DocumentFeaturesMetadataHelper.java

  1. /*
  2.  *  DocumentFeaturesMetadataHelper.java
  3.  *
  4.  *  Copyright (c) 2007-2011, The University of Sheffield.
  5.  *
  6.  *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html),
  7.  *  and is free software, licenced under the GNU Lesser General Public License,
  8.  *  Version 3, June 2007 (also included with this distribution as file
  9.  *  LICENCE-LGPL3.html).
  10.  *
  11.  *  Valentin Tablan, 6 Oct 2009
  12.  *
  13.  *  $Id: DocumentFeaturesMetadataHelper.java 17261 2014-01-30 14:05:14Z valyt $
  14.  */
  15. package gate.mimir.util;

  16. import java.io.Serializable;
  17. import java.util.HashMap;
  18. import java.util.Map;

  19. import org.slf4j.Logger;
  20. import org.slf4j.LoggerFactory;

  21. import gate.mimir.DocumentMetadataHelper;
  22. import gate.mimir.index.DocumentData;
  23. import gate.mimir.index.GATEDocument;
  24. import gate.mimir.search.QueryEngine;


  25. /**
  26.  * A simple {@link DocumentMetadataHelper} that copies the values of some GATE
  27.  * document features as metadata fields in the index. Note that the values of
  28.  * the specified features must be {@link Serializable}; values that are not will
  29.  * not be saved in the index.
  30.  *
  31.  * The values thus saved can be retrieved at search time by calling
  32.  * {@link QueryEngine#getDocumentMetadataField(int, String)}.
  33.  */
  34. public class DocumentFeaturesMetadataHelper implements DocumentMetadataHelper {
  35.  
  36.   /**
  37.    * A map storing the correspondence between the  GATE document feature name
  38.    * and the metadata field name in the Mimir index.
  39.    */
  40.   protected Map<String, String> featureNameToFieldName;
  41.  
  42.   private static Logger logger = LoggerFactory.getLogger(
  43.           DocumentFeaturesMetadataHelper.class);
  44.  
  45.   /**
  46.    * Creates a new DocumentFeaturesMetadataHelper.
  47.    * @param featureNameToFieldName a map storing the correspondence between the
  48.    * GATE document feature name and the metadata field name; keys are names of
  49.    * document features; values are names of metadata fields.
  50.    */
  51.   public DocumentFeaturesMetadataHelper(
  52.           Map<String, String> featureNameToFieldName) {
  53.     this.featureNameToFieldName = featureNameToFieldName;
  54.   }
  55.  
  56.   /**
  57.    * Creates a new DocumentFeaturesMetadataHelper.
  58.    * @param featureNames an array of feature names. For each indexed document,
  59.    * the values for the features specified here are obtained and stored in the
  60.    *  index, as document metadata fields with the same names as the GATE
  61.    *  document features. If you need the names of the Mimir document metadata
  62.    *  fields to be different from the GATE document features, then you should
  63.    *  use the {@link #DocumentFeaturesMetadataHelper(Map)} variant.
  64.    */  
  65.   public DocumentFeaturesMetadataHelper(String... featureNames) {
  66.     this.featureNameToFieldName = new HashMap<String, String>();
  67.     for(String f : featureNames) {
  68.       featureNameToFieldName.put(f, f);
  69.     }
  70.   }

  71.   @Override
  72.   public void documentStart(GATEDocument document) {
  73.     // do nothing
  74.   }

  75.   @Override
  76.   public void documentEnd(GATEDocument document, DocumentData documentData) {
  77.     for(Map.Entry<String, String> mapping : featureNameToFieldName.entrySet()) {
  78.       Object value = document.getDocument().getFeatures().get(mapping.getKey());
  79.       if(value instanceof Serializable) {
  80.         documentData.putMetadataField(mapping.getValue(), (Serializable)value);
  81.       } else if(value != null) { // null is not an instanceof anything
  82.         logger.warn("Value for document feature \"" + mapping.getKey() +
  83.                 "\" on document with title \"" +
  84.                 (document.title() == null ? "<null>" : document.title()) +
  85.                 "\", and URI: \"" +
  86.                 (document.uri() == null ? "<null>" : document.uri()) +
  87.                 "\" is not serializable. Document metadata field NOT saved.");
  88.       }
  89.     }
  90.   }
  91. }