DocumentFeaturesMetadataHelper.java
/*
* DocumentFeaturesMetadataHelper.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 6 Oct 2009
*
* $Id: DocumentFeaturesMetadataHelper.java 17261 2014-01-30 14:05:14Z valyt $
*/
package gate.mimir.util;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import gate.mimir.DocumentMetadataHelper;
import gate.mimir.index.DocumentData;
import gate.mimir.index.GATEDocument;
import gate.mimir.search.QueryEngine;
/**
* A simple {@link DocumentMetadataHelper} that copies the values of some GATE
* document features as metadata fields in the index. Note that the values of
* the specified features must be {@link Serializable}; values that are not will
* not be saved in the index.
*
* The values thus saved can be retrieved at search time by calling
* {@link QueryEngine#getDocumentMetadataField(int, String)}.
*/
public class DocumentFeaturesMetadataHelper implements DocumentMetadataHelper {
/**
* A map storing the correspondence between the GATE document feature name
* and the metadata field name in the Mimir index.
*/
protected Map<String, String> featureNameToFieldName;
private static Logger logger = LoggerFactory.getLogger(
DocumentFeaturesMetadataHelper.class);
/**
* Creates a new DocumentFeaturesMetadataHelper.
* @param featureNameToFieldName a map storing the correspondence between the
* GATE document feature name and the metadata field name; keys are names of
* document features; values are names of metadata fields.
*/
public DocumentFeaturesMetadataHelper(
Map<String, String> featureNameToFieldName) {
this.featureNameToFieldName = featureNameToFieldName;
}
/**
* Creates a new DocumentFeaturesMetadataHelper.
* @param featureNames an array of feature names. For each indexed document,
* the values for the features specified here are obtained and stored in the
* index, as document metadata fields with the same names as the GATE
* document features. If you need the names of the Mimir document metadata
* fields to be different from the GATE document features, then you should
* use the {@link #DocumentFeaturesMetadataHelper(Map)} variant.
*/
public DocumentFeaturesMetadataHelper(String... featureNames) {
this.featureNameToFieldName = new HashMap<String, String>();
for(String f : featureNames) {
featureNameToFieldName.put(f, f);
}
}
@Override
public void documentStart(GATEDocument document) {
// do nothing
}
@Override
public void documentEnd(GATEDocument document, DocumentData documentData) {
for(Map.Entry<String, String> mapping : featureNameToFieldName.entrySet()) {
Object value = document.getDocument().getFeatures().get(mapping.getKey());
if(value instanceof Serializable) {
documentData.putMetadataField(mapping.getValue(), (Serializable)value);
} else if(value != null) { // null is not an instanceof anything
logger.warn("Value for document feature \"" + mapping.getKey() +
"\" on document with title \"" +
(document.title() == null ? "<null>" : document.title()) +
"\", and URI: \"" +
(document.uri() == null ? "<null>" : document.uri()) +
"\" is not serializable. Document metadata field NOT saved.");
}
}
}
}