TermQuery.java
/*
* TermQuery.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 03 Mar 2009
*
* $Id: TermQuery.java 20208 2017-04-19 08:35:28Z domrout $
*/
package gate.mimir.search.query;
import gate.mimir.IndexConfig;
import gate.mimir.index.AtomicIndex;
import gate.mimir.search.IndexReaderPool;
import gate.mimir.search.QueryEngine;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.index.IndexIterator;
import it.unimi.di.big.mg4j.index.IndexIterators;
import it.unimi.di.big.mg4j.index.IndexReader;
import it.unimi.di.big.mg4j.index.payload.Payload;
import it.unimi.di.big.mg4j.search.DocumentIterator;
import it.unimi.di.big.mg4j.search.IntervalIterator;
import it.unimi.di.big.mg4j.search.visitor.DocumentIteratorVisitor;
import java.io.IOException;
import static gate.mimir.search.QueryEngine.IndexType;
/**
* A {@link QueryNode} for term queries. A term query consists of an index name
* and a query term.
*/
public class TermQuery implements QueryNode {
private static final long serialVersionUID = 7302348587893649887L;
/**
* The query term
*/
private String term;
/**
* The term ID for this query. If not known,
* {@link DocumentIterator#END_OF_LIST} is used.
*/
private long termId = DocumentIterator.END_OF_LIST;
/**
* The name of the index to search.
*/
private String indexName;
/**
* The type of the index to be searched.
*/
private IndexType indexType;
/**
* The length of the matches. Defaults to <code>1</code>.
*/
private int length;
/**
* A {@link QueryExecutor} for {@link TermQuery} nodes.
*/
public static class TermQueryExecutor extends AbstractQueryExecutor implements IndexIterator{
/**
* The {@link TermQuery} node being executed.
*/
private TermQuery query;
/**
* A local reference to the {@link IndexReaderPool} from the
* {@link QueryEngine}.
*/
private AtomicIndex atomicIndex;
/**
* The {@link IndexReader} from the {@link #atomicIndex}.
*/
private IndexReader indexReader;
/**
* The index iterator used to obtain hits.
*/
private IndexIterator indexIterator;
/**
* The positions iterator for the latest document.
*/
private IntIterator positionsIterator;
/**
* @param node
* @param invertedIndex
* @throws IOException if the index files cannot be accessed.
*/
public TermQueryExecutor(TermQuery node, QueryEngine engine) throws IOException {
super(engine, node);
this.query = node;
atomicIndex = query.getIndex(engine);
if(atomicIndex == null) throw new IllegalArgumentException(
"No index provided for field " + node.getIndexName() + "!");
Index mg4jIndex = atomicIndex.getIndex();
if(mg4jIndex != null) {
indexReader = mg4jIndex.getReader();
// if we have the term ID, use that
if(query.termId != DocumentIterator.END_OF_LIST) {
this.indexIterator = indexReader.documents(query.termId);
// set the term (used by rankers)
MutableString mutableString = new MutableString(query.getTerm());
atomicIndex.getIndex().termProcessor.processTerm(mutableString);
this.indexIterator.term(mutableString);
} else {
//use the term processor for the query term
MutableString mutableString = new MutableString(query.getTerm());
atomicIndex.getIndex().termProcessor.processTerm(mutableString);
this.indexIterator = indexReader.documents(mutableString.toString());
}
} else {
// the atomic index is empty: we have exhausted the search already
latestDocument = -1;
}
positionsIterator = null;
}
/* (non-Javadoc)
* @see gate.mimir.search.query.QueryExecutor#nextDocument()
*/
public long nextDocument(long from) throws IOException {
if(closed || from+1 >= atomicIndex.getIndex().numberOfDocuments) return latestDocument = -1;
if(latestDocument == -1){
//we have exhausted the search already
return latestDocument;
}
if (from >= latestDocument){
//we do need to skip
latestDocument = indexIterator.skipTo(from + 1);
}else{
//from is lower than latest document,
//so we just return the next document
latestDocument = indexIterator.nextDocument();
}
if(latestDocument == DocumentIterator.END_OF_LIST){
//no more documents available
latestDocument = -1;
} else {
positionsIterator = IndexIterators.positionIterator(indexIterator);
}
return latestDocument;
}
/* (non-Javadoc)
* @see gate.mimir.search.query.QueryExecutor#nextHit(java.util.Map)
*/
public Binding nextHit() throws IOException{
if(closed) return null;
if(positionsIterator == null) positionsIterator =
IndexIterators.positionIterator(indexIterator);
if(latestDocument >= 0 && positionsIterator.hasNext()){
int position = positionsIterator.nextInt();
return new Binding(query, latestDocument, position, query.length, null);
}else{
//no more positions, or no more documents
return null;
}
}
/* (non-Javadoc)
* @see gate.mimir.search.query.QueryExecutor#close()
*/
public void close() throws IOException {
if(closed) return;
super.close();
indexIterator = null;
if(indexReader != null) indexReader.close();
// atomicIndex.returnReader(indexReader);
}
/* (non-Javadoc)
* @see it.unimi.di.big.mg4j.index.IndexIterator#nextPosition()
*/
@Override
public int nextPosition() throws IOException {
// TODO Auto-generated method stub
throw new UnsupportedOperationException("Method not implemented!");
}
public boolean hasNext() {
throw new UnsupportedOperationException("Method not implemented!");
}
public Integer next() {
throw new UnsupportedOperationException("Method not implemented!");
}
public void remove() {
throw new UnsupportedOperationException("Method not implemented!");
}
public Index index() {
return indexIterator.index();
}
public IntervalIterator intervalIterator() throws IOException {
return indexIterator.intervalIterator();
}
public long frequency() throws IOException {
return indexIterator.frequency();
}
public IntervalIterator intervalIterator(Index index) throws IOException {
return indexIterator.intervalIterator(index);
}
public Payload payload() throws IOException {
return indexIterator.payload();
}
public int count() throws IOException {
return indexIterator.count();
}
public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
throws IOException {
return indexIterator.intervalIterators();
}
public ReferenceSet<Index> indices() {
return indexIterator.indices();
}
public IndexIterator id(int id) {
return indexIterator.id(id);
}
public long nextDocument() throws IOException {
return indexIterator.nextDocument();
}
public int id() {
return indexIterator.id();
}
public long document() {
return indexIterator.document();
}
public <T> T accept(DocumentIteratorVisitor<T> visitor) throws IOException {
return indexIterator.accept(visitor);
}
public <T> T acceptOnTruePaths(DocumentIteratorVisitor<T> visitor)
throws IOException {
return indexIterator.acceptOnTruePaths(visitor);
}
public void dispose() throws IOException {
indexIterator.dispose();
}
public long termNumber() {
return indexIterator.termNumber();
}
public String term() {
return indexIterator.term();
}
public IndexIterator term(CharSequence term) {
return indexIterator.term(term);
}
public IndexIterator weight(double weight) {
return indexIterator.weight(weight);
}
public long skipTo(long n) throws IOException {
return indexIterator.skipTo(n);
}
public double weight() {
return indexIterator.weight();
}
}
/**
* @return the term
*/
public CharSequence getTerm() {
return term;
}
/**
* @return the termId
*/
public long getTermId() {
return termId;
}
/**
* @return the indexName
*/
public String getIndexName() {
return indexName;
}
/**
* Gets the index for this query in a given {@link QueryEngine}.
* @param engine
* @return
*/
public AtomicIndex getIndex(QueryEngine engine) {
switch(this.indexType){
case TOKENS:
return engine.getTokenIndex(indexName);
case ANNOTATIONS:
return engine.getAnnotationIndex(indexName);
default:
throw new IllegalArgumentException("Indexes of type " +
indexType + " are not supported!");
}
}
/**
* Creates a new term query, for searching over the document text.
*
* @param indexName the name of the index to be searched. This should be one
* of the annotation feature names used for indexing tokens (see
* {@link IndexConfig.TokenIndexerConfig}).
*
* @param term the term to be searched for.
*
* @see IndexConfig.TokenIndexerConfig
*/
public TermQuery(String indexName, String term) {
this(IndexType.TOKENS, indexName, term, 1);
}
/**
* Creates a new term query, for searching over the document text.
*
* @param indexName the name of the index to be searched. This should be one
* of the annotation feature names used for indexing tokens (see
* {@link IndexConfig.TokenIndexerConfig}).
*
* @param termId the term ID for the term to be searched for.
*
* @see IndexConfig.TokenIndexerConfig
*/
public TermQuery(String indexName, String term, long termId) {
this(IndexType.TOKENS, indexName, term, termId, 1);
}
/**
* Creates a new term query, for searching over semantic annotations.
*
* @param annotationType the type of annotation sought. This should one of the
* annotation types used when indexing semantic annotations (see
* {@link IndexConfig.SemanticIndexerConfig}).
*
* @param mentionURI the URI of the mention sought.
*
* @param length the length of the mention sought.
*/
public TermQuery(String annotationType, String mentionURI, int length) {
this(IndexType.ANNOTATIONS, annotationType, mentionURI, length);
}
/**
* Creates a new term query, for searching over semantic annotations.
*
* @param annotationType the type of annotation sought. This should one of the
* annotation types used when indexing semantic annotations (see
* {@link IndexConfig.SemanticIndexerConfig}).
*
* @param mentionTermid the term ID for the mentionURI sought.
*
* @param length the length of the mention sought.
*/
public TermQuery(String annotationType, String term, long mentionTermid, int length) {
this(IndexType.ANNOTATIONS, annotationType, term, mentionTermid, length);
}
/**
* Creates a new term query. This constructor is part of a low-level API. see
* the other constructors of this class, which may be more suitable!
*
* @param indexType The type of index to be searched.
*
* @param indexName the name of the index to be searched. If the indexType is
* {@link IndexType#TOKENS}, then the name is interpreted as the feature name
* for the document tokens, if the indexType is {@link IndexType#ANNOTATIONS},
* then the name is interpreted as annotation type.
*
* @param term the term to be searched for.
*
* @param length the length of the hits (useful in the case of annotation
* indexes, where the length of each mention is stored external to the actual
* index).
*/
public TermQuery(IndexType indexType, String indexName, String term, int length) {
this.indexType = indexType;
this.indexName = indexName;
this.term = term;
this.length = length;
}
/**
* Creates a new term query. This constructor is part of a low-level API. see
* the other constructors of this class, which may be more suitable!
*
* @param indexType The type of index to be searched.
*
* @param indexName the name of the index to be searched. If the indexType is
* {@link IndexType#TOKENS}, then the name is interpreted as the feature name
* for the document tokens, if the indexType is {@link IndexType#ANNOTATIONS},
* then the name is interpreted as annotation type.
*
* @param length the length of the hits (useful in the case of annotation
* indexes, where the length of each mention is stored external to the actual
* index).
*
* @param termId the term ID for sought term.
*/
public TermQuery(IndexType indexType, String indexName, String term, long termId, int length) {
this.indexType = indexType;
this.indexName = indexName;
this.termId = termId;
this.term = term;
this.length = length;
}
/**
* Gets a new query executor for this {@link TermQuery}.
* @param indexes the set of indexes running on.
* @return an appropriate {@link QueryExecutor} (in this case, an instance of
* {@link TermQueryExecutor}).
* @throws IOException if the index files cannot be accessed.
* @throws IllegalArgumentException if the provided set of indexes does not
* include an index for this query's {@link #indexName}.
* @see gate.mimir.search.query.QueryNode#getQueryExecutor(java.util.Map)
*/
public QueryExecutor getQueryExecutor(QueryEngine engine) throws IOException {
return new TermQueryExecutor(this, engine);
}
public String toString() {
return "TERM(" +
(indexName == null ? "" : indexName) +
":" + term + ")";
}
public IndexType getIndexType() {
return indexType;
}
public int getLength() {
return length;
}
}