OrTermsQuery.java

  1. /*
  2.  *  OrTermsQuery.java
  3.  *
  4.  *  Copyright (c) 2007-2011, The University of Sheffield.
  5.  *
  6.  *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html),
  7.  *  and is free software, licenced under the GNU Lesser General Public License,
  8.  *  Version 3, June 2007 (also included with this distribution as file
  9.  *  LICENCE-LGPL3.html).
  10.  *
  11.  *  Valentin Tablan, 18 Jul 2012
  12.  *
  13.  *  $Id: OrTermsQuery.java 16583 2013-03-12 13:07:53Z valyt $
  14.  */
  15. package gate.mimir.search.terms;

  16. import it.unimi.dsi.fastutil.ints.IntArrayList;
  17. import it.unimi.dsi.fastutil.objects.ObjectArrayList;
  18. import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;

  19. /**
  20.  * Boolean OR operator for term queries.
  21.  * The default count strategy used is
  22.  * {@link AbstractCompoundTermsQuery.CompoundCountsStrategy#FIRST}.
  23.  */
  24. public class OrTermsQuery extends AbstractCompoundTermsQuery {
  25.  
  26.   /**
  27.    * Serialization ID.
  28.    */
  29.   private static final long serialVersionUID = 3293699315503739659L;


  30.   /**
  31.    * Constructs a new OR terms query.
  32.    * @param stringsEnabled should terms strings be returned.
  33.    * @param countsEnabled should term counts be returned. Counts are
  34.    * accumulated across all sub-queries: the count for a term is the sum of all
  35.    * counts for the same term in all sub-queries.  
  36.    * @param limit the maximum number of terms to be returned.
  37.    * @param subQueries the term queries that form the disjunction.
  38.    */
  39.   public OrTermsQuery(TermsQuery... subQueries) {
  40.     super(subQueries);
  41.     setCountsStrategy(AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST);
  42.   }
  43.  
  44.   /* (non-Javadoc)
  45.    * @see gate.mimir.search.terms.CompoundTermsQuery#combine(gate.mimir.search.terms.TermsResultSet[])
  46.    */
  47.   @Override
  48.   public TermsResultSet combine(TermsResultSet... resSets) {
  49.     return orResultsSets(resSets, countsStrategy);
  50.   }

  51.   /**
  52.    * Given a set of {@link TermsResultSet} values, this method combines them
  53.    * into a single {@link TermsResultSet} representing the disjunction of all
  54.    * the provided results sets.
  55.    * @param resSets
  56.    * @return
  57.    */
  58.   public static TermsResultSet orResultsSets(TermsResultSet[] resSets,
  59.       AbstractCompoundTermsQuery.CompoundCountsStrategy countsStrategy) {
  60.     if(countsStrategy == null) countsStrategy = AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST;
  61.     String[] currentTerm = new String[resSets.length];
  62.     ObjectHeapSemiIndirectPriorityQueue<String> queue =
  63.         new ObjectHeapSemiIndirectPriorityQueue<String>(currentTerm);
  64.     int[] termIndex = new int[resSets.length];
  65.     boolean lengthsAvailable = true;
  66.     boolean descriptionsAvailable = true;
  67.     boolean origTermsAvailable = true;
  68.     boolean countsAvailable = true;
  69.     for(int i = 0; i < resSets.length; i++) {
  70.       // this implementation requires that all sub-queries return terms in a
  71.       // consistent order, so we sort them lexicographically by termString
  72.       TermsResultSet.sortTermsResultSetByTermString(resSets[i]);
  73.       if(resSets[i].termStrings.length > 0){
  74.         termIndex[i] = 0;
  75.         currentTerm[i] = resSets[i].termStrings[termIndex[i]];
  76.         queue.enqueue(i);
  77.       }
  78.       // we need *all* sub-queries to provide lengths, because we don't know
  79.       // which one will provide any of the results.
  80.       if(resSets[i].termLengths == null) lengthsAvailable = false;
  81.       if(resSets[i].termCounts == null) countsAvailable = false;
  82.       if(resSets[i].termDescriptions == null) descriptionsAvailable = false;
  83.       if(resSets[i].originalTermStrings == null) origTermsAvailable = false;
  84.     }
  85.    
  86.     // prepare local data
  87.     ObjectArrayList<String> termStrings = new ObjectArrayList<String>();
  88.     ObjectArrayList<String> termDescriptions = descriptionsAvailable ?
  89.         new ObjectArrayList<String>() : null;
  90.     ObjectArrayList<String[][]> origTerms = origTermsAvailable ?
  91.           new ObjectArrayList<String[][]>() : null;        
  92.     IntArrayList termLengths = lengthsAvailable ? new IntArrayList() : null;
  93.     IntArrayList termCounts = countsAvailable ? new IntArrayList() : null;
  94.     int front[] = new int[resSets.length];
  95.     // enumerate all terms
  96.     top:while(!queue.isEmpty()) {
  97.       int first = queue.first();
  98.       String termString = resSets[first].termStrings[termIndex[first]];
  99.       termStrings.add(termString);
  100.       if(lengthsAvailable) {
  101.         termLengths.add(resSets[first].termLengths[termIndex[first]]);
  102.       }
  103.       if(descriptionsAvailable) {
  104.         termDescriptions.add(resSets[first].termDescriptions[termIndex[first]]);
  105.       }
  106.       if(origTermsAvailable) {
  107.         origTerms.add(resSets[first].originalTermStrings[termIndex[first]]);
  108.       }
  109.       if(countsAvailable) {
  110.         // sum all counts
  111.         int frontSize = queue.front(front);
  112.         int[] counts = new int[frontSize];
  113.         for(int i = 0;  i < frontSize; i++) {
  114.           int subRunnerId = front[i];
  115.           counts[i]= resSets[subRunnerId].termCounts[termIndex[subRunnerId]];
  116.         }
  117.         termCounts.add(AbstractCompoundTermsQuery.computeCompoundCount(counts, countsStrategy));
  118.       }
  119.       // consume all equal terms
  120.       while(resSets[first].termStrings[termIndex[first]].equals(termString)) {
  121.         // advance this subRunner
  122.         termIndex[first]++;
  123.         if(termIndex[first] == resSets[first].termStrings.length) {
  124.           // 'first' is out
  125.           queue.dequeue();
  126.           if(queue.isEmpty()) break top;
  127.         } else {
  128.           currentTerm[first] = resSets[first].termStrings[termIndex[first]];
  129.           queue.changed();
  130.         }
  131.         first = queue.first();
  132.       }
  133.     }
  134.     // construct the result
  135.     TermsResultSet res = new TermsResultSet(
  136.         termStrings.toArray(new String[termStrings.size()]),
  137.         lengthsAvailable ? termLengths.toIntArray() : null,
  138.         countsAvailable ? termCounts.toIntArray() : null,
  139.         descriptionsAvailable ?
  140.           termDescriptions.toArray(new String[termDescriptions.size()]) : null);
  141.     if(origTermsAvailable){
  142.       res.originalTermStrings = origTerms.toArray(
  143.         new String[origTerms.size()][][]);
  144.     }
  145.     return res;    
  146.   }
  147. }