DocumentData.java
/*
* DocumentData.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 15 Apr 2009
*
* $Id: DocumentData.java 17261 2014-01-30 14:05:14Z valyt $
*/
package gate.mimir.index;
import java.io.Serializable;
import java.util.HashMap;
/**
* A container for the document data that gets stored in the zip collection.
*/
public class DocumentData implements Serializable {
/**
* Constructs a new DocumentData object.
* @param documentURI the URI of the document.
* @param documentTitle the title of the document.
* @param tokens the document tokens.
* @param nonTokens the document non-tokens (i.e. spaces).
*/
public DocumentData(String documentURI, String documentTitle,
String[] tokens, String[] nonTokens) {
this.documentURI = documentURI;
this.documentTitle = documentTitle;
this.tokens = tokens;
this.nonTokens = nonTokens;
}
/**
* Adds a new arbitrary metadata field.
* @param fieldName the name for the new field.
* @param fieldValue the value for the new field. The value provided here must
* be {@link Serializable}. The map of metadata fields is stored separately
* for each individual document; care should be taken to limit the size of
* the object graph that is serialised!
*/
public void putMetadataField(String fieldName, Serializable fieldValue){
if(metadata == null){
metadata = new HashMap<String, Serializable>();
}
metadata.put(fieldName, fieldValue);
}
/**
* Gets the value of a metadata field.
* @param fieldName the name of field to be returned.
* @return the value previously stored in the metadata map for this field.
*/
public Serializable getMetadataField(String fieldName){
return metadata == null ? null : metadata.get(fieldName);
}
/**
* @return the tokens
*/
public String[] getTokens() {
return tokens;
}
/**
* @return the nonTokens
*/
public String[] getNonTokens() {
return nonTokens;
}
public String[][] getText(int termPosition, int length) {
if(length < 0) {
length = tokens.length - termPosition;
if(length < 0) {
// still less than 0 means termPosition was beyond the end of the doc,
// so return no tokens.
length = 0;
}
}
String[][] result = new String[2][];
result[0] = new String[length];
result[1] = new String[length];
for(int i = 0; i < length; i++) {
int docIdx = i + termPosition;
result[0][i] = docIdx < 0 ? null :
(docIdx < tokens.length ? tokens[docIdx] : null);
result[1][i] = docIdx < 0 ? null :
(docIdx < nonTokens.length ? nonTokens[docIdx] : null);
}
return result;
}
/**
* @return the documentURI
*/
public String getDocumentURI() {
return documentURI;
}
/**
* @return the documentTitle
*/
public String getDocumentTitle() {
return documentTitle;
}
/**
* Serialisation UID
*/
private static final long serialVersionUID = 7079350474333976576L;
/**
* The tokens of the document.
*/
protected String[] tokens;
/**
* The non-tokens (i.e. spaces) of the document.
*/
protected String[] nonTokens;
/**
* The Document URI
*/
protected String documentURI;
/**
* The Document title.
*/
protected String documentTitle;
/**
* A {@link HashMap} of arbitrary metadata (all fields must be
* {@link Serializable}).
*/
protected HashMap<String, Serializable> metadata;
}