TermQuery.java

/*
 *  TermQuery.java
 *
 *  Copyright (c) 2007-2011, The University of Sheffield.
 *
 *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html), 
 *  and is free software, licenced under the GNU Lesser General Public License,
 *  Version 3, June 2007 (also included with this distribution as file
 *  LICENCE-LGPL3.html).
 *
 *  Valentin Tablan, 03 Mar 2009
 *  
 *  $Id: TermQuery.java 20208 2017-04-19 08:35:28Z domrout $
 */

package gate.mimir.search.query;

import gate.mimir.IndexConfig;
import gate.mimir.index.AtomicIndex;
import gate.mimir.search.IndexReaderPool;
import gate.mimir.search.QueryEngine;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.index.IndexIterator;
import it.unimi.di.big.mg4j.index.IndexIterators;
import it.unimi.di.big.mg4j.index.IndexReader;
import it.unimi.di.big.mg4j.index.payload.Payload;
import it.unimi.di.big.mg4j.search.DocumentIterator;
import it.unimi.di.big.mg4j.search.IntervalIterator;
import it.unimi.di.big.mg4j.search.visitor.DocumentIteratorVisitor;

import java.io.IOException;

import static gate.mimir.search.QueryEngine.IndexType;

/**
 * A {@link QueryNode} for term queries. A term query consists of an index name 
 * and a query term. 
 */
public class TermQuery implements QueryNode {

  private static final long serialVersionUID = 7302348587893649887L;

  /**
   * The query term
   */
  private String term;
  
  /**
   * The term ID for this query. If not known, 
   * {@link DocumentIterator#END_OF_LIST} is used.
   */
  private long termId = DocumentIterator.END_OF_LIST;
  
  
  /**
   * The name of the index to search.
   */
  private String indexName;
  
  /**
   * The type of the index to be searched.
   */
  private IndexType indexType;
  
  /**
   * The length of the matches. Defaults to <code>1</code>.
   */
  private int length;
  
  
  /**
   * A {@link QueryExecutor} for {@link TermQuery} nodes.
   */
  public static class TermQueryExecutor extends AbstractQueryExecutor implements IndexIterator{

    
    /**
     * The {@link TermQuery} node being executed.
     */
    private TermQuery query;
    
    /**
     * A local reference to the {@link IndexReaderPool} from the 
     * {@link QueryEngine}.
     */
    private AtomicIndex atomicIndex;
    
    /**
     * The {@link IndexReader} from the {@link #atomicIndex}.
     */
    private IndexReader indexReader;
    
    /**
     * The index iterator used to obtain hits. 
     */
    private IndexIterator indexIterator;
    
    /**
     * The positions iterator for the latest document.
     */
    private IntIterator positionsIterator;
    
    
    /**
     * @param node
     * @param invertedIndex
     * @throws IOException if the index files cannot be accessed.
     */
    public TermQueryExecutor(TermQuery node, QueryEngine engine) throws IOException {
      super(engine, node);
      this.query = node;
      atomicIndex = query.getIndex(engine);

      if(atomicIndex == null) throw new IllegalArgumentException(
              "No index provided for field " + node.getIndexName() + "!");
      Index mg4jIndex = atomicIndex.getIndex();
      if(mg4jIndex != null) {
        indexReader = mg4jIndex.getReader();      
        // if we have the term ID, use that
        if(query.termId != DocumentIterator.END_OF_LIST) {
          this.indexIterator = indexReader.documents(query.termId);
          // set the term (used by rankers)
          MutableString mutableString = new MutableString(query.getTerm());
          atomicIndex.getIndex().termProcessor.processTerm(mutableString);
          this.indexIterator.term(mutableString);
        } else {
          //use the term processor for the query term
          MutableString mutableString = new MutableString(query.getTerm());
          atomicIndex.getIndex().termProcessor.processTerm(mutableString);
          this.indexIterator = indexReader.documents(mutableString.toString());        
        }        
      } else {
        // the atomic index is empty: we have exhausted the search already
        latestDocument = -1;
      }

      positionsIterator = null;
    }

    /* (non-Javadoc)
     * @see gate.mimir.search.query.QueryExecutor#nextDocument()
     */
    public long nextDocument(long from) throws IOException {
      if(closed || from+1 >= atomicIndex.getIndex().numberOfDocuments) return latestDocument = -1;
      if(latestDocument == -1){
        //we have exhausted the search already
        return latestDocument;
      }
      
      if (from >= latestDocument){
        //we do need to skip
        latestDocument = indexIterator.skipTo(from + 1);
      }else{
        //from is lower than latest document, 
        //so we just return the next document
        latestDocument = indexIterator.nextDocument();
      }
      if(latestDocument == DocumentIterator.END_OF_LIST){
        //no more documents available
        latestDocument = -1;
      } else {
        positionsIterator = IndexIterators.positionIterator(indexIterator);
      }
      return latestDocument;
    }

    /* (non-Javadoc)
     * @see gate.mimir.search.query.QueryExecutor#nextHit(java.util.Map)
     */
    public Binding nextHit() throws IOException{
      if(closed) return null;
      if(positionsIterator == null) positionsIterator = 
          IndexIterators.positionIterator(indexIterator);
      if(latestDocument >= 0 && positionsIterator.hasNext()){
        int position = positionsIterator.nextInt();
        return new Binding(query, latestDocument, position, query.length, null);
      }else{
        //no more positions, or no more documents
        return null;
      }
    }

    /* (non-Javadoc)
     * @see gate.mimir.search.query.QueryExecutor#close()
     */
    public void close() throws IOException {
      if(closed) return;
      super.close();
      indexIterator = null;
      if(indexReader != null) indexReader.close();
//      atomicIndex.returnReader(indexReader);
    }

    
    /* (non-Javadoc)
     * @see it.unimi.di.big.mg4j.index.IndexIterator#nextPosition()
     */
    @Override
    public int nextPosition() throws IOException {
      // TODO Auto-generated method stub
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public boolean hasNext() {
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public Integer next() {
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public void remove() {
      throw new UnsupportedOperationException("Method not implemented!");
    }

    public Index index() {
      return indexIterator.index();
    }

    public IntervalIterator intervalIterator() throws IOException {
      return indexIterator.intervalIterator();
    }

    public long frequency() throws IOException {
      return indexIterator.frequency();
    }

    public IntervalIterator intervalIterator(Index index) throws IOException {
      return indexIterator.intervalIterator(index);
    }

    public Payload payload() throws IOException {
      return indexIterator.payload();
    }

    public int count() throws IOException {
      return indexIterator.count();
    }

    public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
      throws IOException {
      return indexIterator.intervalIterators();
    }

    public ReferenceSet<Index> indices() {
      return indexIterator.indices();
    }

    public IndexIterator id(int id) {
      return indexIterator.id(id);
    }

    public long nextDocument() throws IOException {
      return indexIterator.nextDocument();
    }

    public int id() {
      return indexIterator.id();
    }

    public long document() {
      return indexIterator.document();
    }

    public <T> T accept(DocumentIteratorVisitor<T> visitor) throws IOException {
      return indexIterator.accept(visitor);
    }

    public <T> T acceptOnTruePaths(DocumentIteratorVisitor<T> visitor)
      throws IOException {
      return indexIterator.acceptOnTruePaths(visitor);
    }

    public void dispose() throws IOException {
      indexIterator.dispose();
    }

    public long termNumber() {
      return indexIterator.termNumber();
    }

    public String term() {
      return indexIterator.term();
    }

    public IndexIterator term(CharSequence term) {
      return indexIterator.term(term);
    }

    public IndexIterator weight(double weight) {
      return indexIterator.weight(weight);
    }

    public long skipTo(long n) throws IOException {
      return indexIterator.skipTo(n);
    }

    public double weight() {
      return indexIterator.weight();
    }
    
  }
  
  /**
   * @return the term
   */
  public CharSequence getTerm() {
    return term;
  }
  
  /**
   * @return the termId
   */
  public long getTermId() {
    return termId;
  }

  /**
   * @return the indexName
   */
  public String getIndexName() {
    return indexName;
  }
  
  /**
   * Gets the index for this query in a given {@link QueryEngine}.
   * @param engine
   * @return
   */
  public AtomicIndex getIndex(QueryEngine engine) {
    switch(this.indexType){
      case TOKENS:
        return engine.getTokenIndex(indexName);
      case ANNOTATIONS:
        return engine.getAnnotationIndex(indexName);
      default:
        throw new IllegalArgumentException("Indexes of type " + 
                indexType + " are not supported!"); 
    }
  }
  
  
  /**
   * Creates a new term query, for searching over the document text. 
   * 
   * @param indexName the name of the index to be searched. This should be one
   * of the annotation feature names used for indexing tokens (see 
   * {@link IndexConfig.TokenIndexerConfig}).
   * 
   * @param term the term to be searched for.
   * 
   * @see IndexConfig.TokenIndexerConfig
   */
  public TermQuery(String indexName, String term) {
    this(IndexType.TOKENS, indexName, term, 1);
  }
  
  /**
   * Creates a new term query, for searching over the document text. 
   * 
   * @param indexName the name of the index to be searched. This should be one
   * of the annotation feature names used for indexing tokens (see 
   * {@link IndexConfig.TokenIndexerConfig}).
   * 
   * @param termId the term ID for the term to be searched for.
   * 
   * @see IndexConfig.TokenIndexerConfig
   */
  public TermQuery(String indexName, String term, long termId) {
    this(IndexType.TOKENS, indexName, term, termId, 1);
  }
  
  /**
   * Creates a new term query, for searching over semantic annotations.
   *   
   * @param annotationType the type of annotation sought. This should one of the 
   * annotation types used when indexing semantic annotations (see 
   * {@link IndexConfig.SemanticIndexerConfig}).
   * 
   * @param mentionURI the URI of the mention sought.
   * 
   * @param length the length of the mention sought.
   */
  public TermQuery(String annotationType, String mentionURI, int length) {
    this(IndexType.ANNOTATIONS, annotationType, mentionURI, length);
  }
  
  /**
   * Creates a new term query, for searching over semantic annotations.
   *   
   * @param annotationType the type of annotation sought. This should one of the 
   * annotation types used when indexing semantic annotations (see 
   * {@link IndexConfig.SemanticIndexerConfig}).
   * 
   * @param mentionTermid the term ID for the mentionURI sought.
   * 
   * @param length the length of the mention sought.
   */
  public TermQuery(String annotationType, String term, long mentionTermid, int length) {
    this(IndexType.ANNOTATIONS, annotationType, term, mentionTermid, length);
  }  
  
  /**
   * Creates a new term query. This constructor is part of a low-level API. see 
   * the other constructors of this class, which may be more suitable!
   *   
   * @param indexType The type of index to be searched.
   * 
   * @param indexName the name of the index to be searched. If the indexType is
   * {@link IndexType#TOKENS}, then the name is interpreted as the feature name 
   * for the document tokens, if the indexType is {@link IndexType#ANNOTATIONS}, 
   * then the name is interpreted as annotation type.
   * 
   * @param term the term to be searched for.
   * 
   * @param length the length of the hits (useful in the case of annotation 
   * indexes, where the length of each mention is stored external to the actual 
   * index).
   */
  public TermQuery(IndexType indexType, String indexName, String term, int length) {
    this.indexType = indexType;
    this.indexName = indexName;
    this.term = term;
    this.length = length;
  }
  
  /**
   * Creates a new term query. This constructor is part of a low-level API. see 
   * the other constructors of this class, which may be more suitable!
   *   
   * @param indexType The type of index to be searched.
   * 
   * @param indexName the name of the index to be searched. If the indexType is
   * {@link IndexType#TOKENS}, then the name is interpreted as the feature name 
   * for the document tokens, if the indexType is {@link IndexType#ANNOTATIONS}, 
   * then the name is interpreted as annotation type.
   * 
   * @param length the length of the hits (useful in the case of annotation 
   * indexes, where the length of each mention is stored external to the actual 
   * index).
   * 
   * @param termId the term ID for sought term.
   */
  public TermQuery(IndexType indexType, String indexName, String term, long termId, int length) {
    this.indexType = indexType;
    this.indexName = indexName;
    this.termId = termId;
    this.term = term;
    this.length = length;
  }
  
  
  
  /**
   * Gets a new query executor for this {@link TermQuery}.
   * @param indexes the set of indexes running on.
   * @return an appropriate {@link QueryExecutor} (in this case, an instance of
   * {@link TermQueryExecutor}).
   * @throws IOException if the index files cannot be accessed.
   * @throws IllegalArgumentException if the provided set of indexes does not
   * include an index for this query's {@link #indexName}.
   * @see gate.mimir.search.query.QueryNode#getQueryExecutor(java.util.Map)
   */
  public QueryExecutor getQueryExecutor(QueryEngine engine) throws IOException {
    return new TermQueryExecutor(this, engine);
  }
  
  public String toString() {
    return "TERM(" + 
        (indexName == null ? "" : indexName) + 
        ":" + term + ")";
  }

  public IndexType getIndexType() {
    return indexType;
  }

  public int getLength() {
    return length;
  }
}