DocumentFeaturesMetadataHelper.java
- /*
- * DocumentFeaturesMetadataHelper.java
- *
- * Copyright (c) 2007-2011, The University of Sheffield.
- *
- * This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
- * and is free software, licenced under the GNU Lesser General Public License,
- * Version 3, June 2007 (also included with this distribution as file
- * LICENCE-LGPL3.html).
- *
- * Valentin Tablan, 6 Oct 2009
- *
- * $Id: DocumentFeaturesMetadataHelper.java 17261 2014-01-30 14:05:14Z valyt $
- */
- package gate.mimir.util;
- import java.io.Serializable;
- import java.util.HashMap;
- import java.util.Map;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import gate.mimir.DocumentMetadataHelper;
- import gate.mimir.index.DocumentData;
- import gate.mimir.index.GATEDocument;
- import gate.mimir.search.QueryEngine;
- /**
- * A simple {@link DocumentMetadataHelper} that copies the values of some GATE
- * document features as metadata fields in the index. Note that the values of
- * the specified features must be {@link Serializable}; values that are not will
- * not be saved in the index.
- *
- * The values thus saved can be retrieved at search time by calling
- * {@link QueryEngine#getDocumentMetadataField(int, String)}.
- */
- public class DocumentFeaturesMetadataHelper implements DocumentMetadataHelper {
-
- /**
- * A map storing the correspondence between the GATE document feature name
- * and the metadata field name in the Mimir index.
- */
- protected Map<String, String> featureNameToFieldName;
-
- private static Logger logger = LoggerFactory.getLogger(
- DocumentFeaturesMetadataHelper.class);
-
- /**
- * Creates a new DocumentFeaturesMetadataHelper.
- * @param featureNameToFieldName a map storing the correspondence between the
- * GATE document feature name and the metadata field name; keys are names of
- * document features; values are names of metadata fields.
- */
- public DocumentFeaturesMetadataHelper(
- Map<String, String> featureNameToFieldName) {
- this.featureNameToFieldName = featureNameToFieldName;
- }
-
- /**
- * Creates a new DocumentFeaturesMetadataHelper.
- * @param featureNames an array of feature names. For each indexed document,
- * the values for the features specified here are obtained and stored in the
- * index, as document metadata fields with the same names as the GATE
- * document features. If you need the names of the Mimir document metadata
- * fields to be different from the GATE document features, then you should
- * use the {@link #DocumentFeaturesMetadataHelper(Map)} variant.
- */
- public DocumentFeaturesMetadataHelper(String... featureNames) {
- this.featureNameToFieldName = new HashMap<String, String>();
- for(String f : featureNames) {
- featureNameToFieldName.put(f, f);
- }
- }
- @Override
- public void documentStart(GATEDocument document) {
- // do nothing
- }
- @Override
- public void documentEnd(GATEDocument document, DocumentData documentData) {
- for(Map.Entry<String, String> mapping : featureNameToFieldName.entrySet()) {
- Object value = document.getDocument().getFeatures().get(mapping.getKey());
- if(value instanceof Serializable) {
- documentData.putMetadataField(mapping.getValue(), (Serializable)value);
- } else if(value != null) { // null is not an instanceof anything
- logger.warn("Value for document feature \"" + mapping.getKey() +
- "\" on document with title \"" +
- (document.title() == null ? "<null>" : document.title()) +
- "\", and URI: \"" +
- (document.uri() == null ? "<null>" : document.uri()) +
- "\" is not serializable. Document metadata field NOT saved.");
- }
- }
- }
- }