GATEDocumentFactory.java

  1. /*
  2.  *  GATEDocumentFactory.java
  3.  *
  4.  *  Copyright (c) 2007-2011, The University of Sheffield.
  5.  *
  6.  *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html),
  7.  *  and is free software, licenced under the GNU Lesser General Public License,
  8.  *  Version 3, June 2007 (also included with this distribution as file
  9.  *  LICENCE-LGPL3.html).
  10.  *
  11.  *  Valentin Tablan, 15 Apr 2009
  12.  *
  13.  *  $Id: GATEDocumentFactory.java 17261 2014-01-30 14:05:14Z valyt $
  14.  */
  15. package gate.mimir.index;

  16. import gate.mimir.IndexConfig;
  17. import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
  18. import it.unimi.di.big.mg4j.document.Document;
  19. import it.unimi.di.big.mg4j.document.DocumentFactory;

  20. import java.io.IOException;
  21. import java.io.InputStream;


  22. /**
  23.  * An MG4J {@link DocumentFactory} for GATE documents, configured according
  24.  * to the current indexing requirements.
  25.  */
  26. public class GATEDocumentFactory implements DocumentFactory{

  27.   /**
  28.    *
  29.    */
  30.   private static final long serialVersionUID = 6070650764387229146L;
  31.  
  32.   /**
  33.    * The index configuration.
  34.    */
  35.   private IndexConfig indexConfig;
  36.  
  37.   public GATEDocumentFactory(IndexConfig indexConfig){
  38.     this.indexConfig = indexConfig;
  39.   }
  40.  
  41.   /* (non-Javadoc)
  42.    * @see it.unimi.dsi.mg4j.document.DocumentFactory#copy()
  43.    */
  44.   public DocumentFactory copy() {
  45.     throw new UnsupportedOperationException(getClass().getName() +
  46.             " does not support copying!");
  47.   }

  48.   /* (non-Javadoc)
  49.    * @see it.unimi.dsi.mg4j.document.DocumentFactory#fieldIndex(java.lang.String)
  50.    */
  51.   public int fieldIndex(String fieldName) {
  52.     for(int i = 0; i < indexConfig.getTokenIndexers().length; i++){
  53.       if(indexConfig.getTokenIndexers()[i].getFeatureName().equals(fieldName)){
  54.         return i;
  55.       }
  56.     }
  57.     return -1;
  58.   }

  59.   /* (non-Javadoc)
  60.    * @see it.unimi.dsi.mg4j.document.DocumentFactory#fieldName(int)
  61.    */
  62.   public String fieldName(int field) {
  63.     return indexConfig.getTokenIndexers()[field].getFeatureName();
  64.   }

  65.   /* (non-Javadoc)
  66.    * @see it.unimi.dsi.mg4j.document.DocumentFactory#fieldType(int)
  67.    */
  68.   public FieldType fieldType(int field) {
  69.     // all GATE fields are TEXT
  70.     return FieldType.TEXT;
  71.   }

  72.   /* (non-Javadoc)
  73.    * @see it.unimi.dsi.mg4j.document.DocumentFactory#getDocument(java.io.InputStream, it.unimi.dsi.fastutil.objects.Reference2ObjectMap)
  74.    */
  75.   public Document getDocument(InputStream rawContent,
  76.           Reference2ObjectMap<Enum<?>, Object> metadata) throws IOException {
  77.     //we do not support reading of documents from streams
  78.     throw new UnsupportedOperationException(getClass().getName() +
  79.             " does not support reading from streams!");
  80.   }

  81.   /* (non-Javadoc)
  82.    * @see it.unimi.dsi.mg4j.document.DocumentFactory#numberOfFields()
  83.    */
  84.   public int numberOfFields() {
  85.     return indexConfig.getTokenIndexers().length;
  86.   }
  87. }