GATEDocumentFactory.java
- /*
- * GATEDocumentFactory.java
- *
- * Copyright (c) 2007-2011, The University of Sheffield.
- *
- * This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
- * and is free software, licenced under the GNU Lesser General Public License,
- * Version 3, June 2007 (also included with this distribution as file
- * LICENCE-LGPL3.html).
- *
- * Valentin Tablan, 15 Apr 2009
- *
- * $Id: GATEDocumentFactory.java 17261 2014-01-30 14:05:14Z valyt $
- */
- package gate.mimir.index;
- import gate.mimir.IndexConfig;
- import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
- import it.unimi.di.big.mg4j.document.Document;
- import it.unimi.di.big.mg4j.document.DocumentFactory;
- import java.io.IOException;
- import java.io.InputStream;
- /**
- * An MG4J {@link DocumentFactory} for GATE documents, configured according
- * to the current indexing requirements.
- */
- public class GATEDocumentFactory implements DocumentFactory{
- /**
- *
- */
- private static final long serialVersionUID = 6070650764387229146L;
-
- /**
- * The index configuration.
- */
- private IndexConfig indexConfig;
-
- public GATEDocumentFactory(IndexConfig indexConfig){
- this.indexConfig = indexConfig;
- }
-
- /* (non-Javadoc)
- * @see it.unimi.dsi.mg4j.document.DocumentFactory#copy()
- */
- public DocumentFactory copy() {
- throw new UnsupportedOperationException(getClass().getName() +
- " does not support copying!");
- }
- /* (non-Javadoc)
- * @see it.unimi.dsi.mg4j.document.DocumentFactory#fieldIndex(java.lang.String)
- */
- public int fieldIndex(String fieldName) {
- for(int i = 0; i < indexConfig.getTokenIndexers().length; i++){
- if(indexConfig.getTokenIndexers()[i].getFeatureName().equals(fieldName)){
- return i;
- }
- }
- return -1;
- }
- /* (non-Javadoc)
- * @see it.unimi.dsi.mg4j.document.DocumentFactory#fieldName(int)
- */
- public String fieldName(int field) {
- return indexConfig.getTokenIndexers()[field].getFeatureName();
- }
- /* (non-Javadoc)
- * @see it.unimi.dsi.mg4j.document.DocumentFactory#fieldType(int)
- */
- public FieldType fieldType(int field) {
- // all GATE fields are TEXT
- return FieldType.TEXT;
- }
- /* (non-Javadoc)
- * @see it.unimi.dsi.mg4j.document.DocumentFactory#getDocument(java.io.InputStream, it.unimi.dsi.fastutil.objects.Reference2ObjectMap)
- */
- public Document getDocument(InputStream rawContent,
- Reference2ObjectMap<Enum<?>, Object> metadata) throws IOException {
- //we do not support reading of documents from streams
- throw new UnsupportedOperationException(getClass().getName() +
- " does not support reading from streams!");
- }
- /* (non-Javadoc)
- * @see it.unimi.dsi.mg4j.document.DocumentFactory#numberOfFields()
- */
- public int numberOfFields() {
- return indexConfig.getTokenIndexers().length;
- }
- }