MG4JTools.java
/*
* Mg4JTools.java
*
* Copyright (c) 2007-2012, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 12 Jul 2012
*
* $Id: MG4JTools.java 17206 2013-12-24 16:30:52Z valyt $
*/
package gate.mimir.util;
import gate.mimir.index.AtomicIndex;
import gate.mimir.search.QueryEngine;
import it.unimi.di.big.mg4j.index.DiskBasedIndex;
import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.index.Index.UriKeys;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.util.Properties;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.commons.configuration.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Class providing utility methods for working with MG4J indexes.
*/
public class MG4JTools {
protected static final Logger logger = LoggerFactory.getLogger(MG4JTools.class);
/**
* Given a index URI (a file URI denoting the index base name for all the
* index files), this method checks if the index if an older version, and
* upgrades it to the current version, making sure it can be opened.
* @param indexUri
* @throws IOException
* @throws ClassNotFoundException
* @throws ConfigurationException
*/
public static void upgradeIndex(URI indexUri) throws IOException,
ClassNotFoundException, ConfigurationException {
// check if the term map is 32 bits, and convert if needed.
File termMapFile = new File(URI.create(indexUri.toString()
+ DiskBasedIndex.TERMMAP_EXTENSION));
Object termmap = BinIO.loadObject(termMapFile);
if(termmap instanceof it.unimi.dsi.util.StringMap) {
// 32 bit index: save the old termmap
logger.warn("Old index format detected (32 bits term map file); " +
"converting to new version. Old files will be backed up with " +
"a .32bit extension.");
if(termMapFile.renameTo(new File(URI.create(indexUri.toString()
+ DiskBasedIndex.TERMMAP_EXTENSION + ".32bit")))) {
// and generate the new one
File termsFile = new File(URI.create(indexUri.toString()
+ DiskBasedIndex.TERMS_EXTENSION));
AtomicIndex.generateTermMap(termsFile, termMapFile, null);
} else {
throw new IOException("Could not rename old termmap file (" +
termMapFile.getAbsolutePath() + ").");
}
}
// check if the .properties file contains any mg4j-standard classes,
// and replace all mentions with the equivalent mg4j-big ones
File propsFile = new File(URI.create(indexUri.toString()
+ DiskBasedIndex.PROPERTIES_EXTENSION));
Properties indexProps = new Properties(propsFile);
indexProps.setAutoSave(false);
Iterator<String> keysIter = indexProps.getKeys();
String OLDPKG = "it.unimi.dsi.mg4j";
String NEWPKG = "it.unimi.dsi.big.mg4j";
Map<String, String> newVals = new LinkedHashMap<String, String>();
while(keysIter.hasNext()) {
String key = keysIter.next();
Object value = indexProps.getProperty(key);
if(value instanceof String && ((String)value).indexOf(OLDPKG) >= 0) {
newVals.put(key, ((String)value).replace(OLDPKG, NEWPKG));
}
}
if(newVals.size() > 0) {
// save a backup
logger.warn("Old index format detected (32 bits properties file); " +
"converting to new version. Old files will be backed up with " +
"a .32bit extension.");
if(propsFile.renameTo(new File(URI.create(indexUri.toString()
+ DiskBasedIndex.PROPERTIES_EXTENSION + ".32bit")))) {
// update the properties values
for(Map.Entry<String, String> newEntry : newVals.entrySet()) {
indexProps.setProperty(newEntry.getKey(), newEntry.getValue());
}
// save the changed props
indexProps.save();
} else {
throw new IOException("Could not rename old properties file (" +
propsFile.getAbsolutePath() + ").");
}
}
}
/**
* Opens one MG4J index.
*
* @param indexUri a URI denoting the basename for the index (a file path
* with the correct basename, but no extension).
*
* @return the MG4J {@link Index} object.
* @throws ConfigurationException
* @throws SecurityException
* @throws IOException
* @throws URISyntaxException
* @throws ClassNotFoundException
* @throws InstantiationException
* @throws IllegalAccessException
* @throws InvocationTargetException
* @throws NoSuchMethodException
*/
public static Index openMg4jIndex(URI indexUri)
throws ConfigurationException, SecurityException, IOException,
URISyntaxException, ClassNotFoundException, InstantiationException,
IllegalAccessException, InvocationTargetException, NoSuchMethodException {
Index theIndex = null;
String basename = indexUri.toString();
try {
String options = "?" + UriKeys.MAPPED.name().toLowerCase() + "=1;" +
UriKeys.OFFSETSTEP.toString().toLowerCase() + "=-" +
DiskBasedIndex.DEFAULT_OFFSET_STEP;
logger.debug("Opening index: " + basename + options);
theIndex = Index.getInstance(basename + options, true, true);
} catch(IOException e) {
// memory mapping failed
logger.info("Memory mapping failed for index " + basename
+ ". Loading as file index instead");
// now try to just open it as an on-disk index
theIndex = Index.getInstance(basename, true, true);
}
return theIndex;
}
}