OrTermsQuery.java
- /*
- * OrTermsQuery.java
- *
- * Copyright (c) 2007-2011, The University of Sheffield.
- *
- * This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
- * and is free software, licenced under the GNU Lesser General Public License,
- * Version 3, June 2007 (also included with this distribution as file
- * LICENCE-LGPL3.html).
- *
- * Valentin Tablan, 18 Jul 2012
- *
- * $Id: OrTermsQuery.java 16583 2013-03-12 13:07:53Z valyt $
- */
- package gate.mimir.search.terms;
- import it.unimi.dsi.fastutil.ints.IntArrayList;
- import it.unimi.dsi.fastutil.objects.ObjectArrayList;
- import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;
- /**
- * Boolean OR operator for term queries.
- * The default count strategy used is
- * {@link AbstractCompoundTermsQuery.CompoundCountsStrategy#FIRST}.
- */
- public class OrTermsQuery extends AbstractCompoundTermsQuery {
-
- /**
- * Serialization ID.
- */
- private static final long serialVersionUID = 3293699315503739659L;
- /**
- * Constructs a new OR terms query.
- * @param stringsEnabled should terms strings be returned.
- * @param countsEnabled should term counts be returned. Counts are
- * accumulated across all sub-queries: the count for a term is the sum of all
- * counts for the same term in all sub-queries.
- * @param limit the maximum number of terms to be returned.
- * @param subQueries the term queries that form the disjunction.
- */
- public OrTermsQuery(TermsQuery... subQueries) {
- super(subQueries);
- setCountsStrategy(AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST);
- }
-
- /* (non-Javadoc)
- * @see gate.mimir.search.terms.CompoundTermsQuery#combine(gate.mimir.search.terms.TermsResultSet[])
- */
- @Override
- public TermsResultSet combine(TermsResultSet... resSets) {
- return orResultsSets(resSets, countsStrategy);
- }
- /**
- * Given a set of {@link TermsResultSet} values, this method combines them
- * into a single {@link TermsResultSet} representing the disjunction of all
- * the provided results sets.
- * @param resSets
- * @return
- */
- public static TermsResultSet orResultsSets(TermsResultSet[] resSets,
- AbstractCompoundTermsQuery.CompoundCountsStrategy countsStrategy) {
- if(countsStrategy == null) countsStrategy = AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST;
- String[] currentTerm = new String[resSets.length];
- ObjectHeapSemiIndirectPriorityQueue<String> queue =
- new ObjectHeapSemiIndirectPriorityQueue<String>(currentTerm);
- int[] termIndex = new int[resSets.length];
- boolean lengthsAvailable = true;
- boolean descriptionsAvailable = true;
- boolean origTermsAvailable = true;
- boolean countsAvailable = true;
- for(int i = 0; i < resSets.length; i++) {
- // this implementation requires that all sub-queries return terms in a
- // consistent order, so we sort them lexicographically by termString
- TermsResultSet.sortTermsResultSetByTermString(resSets[i]);
- if(resSets[i].termStrings.length > 0){
- termIndex[i] = 0;
- currentTerm[i] = resSets[i].termStrings[termIndex[i]];
- queue.enqueue(i);
- }
- // we need *all* sub-queries to provide lengths, because we don't know
- // which one will provide any of the results.
- if(resSets[i].termLengths == null) lengthsAvailable = false;
- if(resSets[i].termCounts == null) countsAvailable = false;
- if(resSets[i].termDescriptions == null) descriptionsAvailable = false;
- if(resSets[i].originalTermStrings == null) origTermsAvailable = false;
- }
-
- // prepare local data
- ObjectArrayList<String> termStrings = new ObjectArrayList<String>();
- ObjectArrayList<String> termDescriptions = descriptionsAvailable ?
- new ObjectArrayList<String>() : null;
- ObjectArrayList<String[][]> origTerms = origTermsAvailable ?
- new ObjectArrayList<String[][]>() : null;
- IntArrayList termLengths = lengthsAvailable ? new IntArrayList() : null;
- IntArrayList termCounts = countsAvailable ? new IntArrayList() : null;
- int front[] = new int[resSets.length];
- // enumerate all terms
- top:while(!queue.isEmpty()) {
- int first = queue.first();
- String termString = resSets[first].termStrings[termIndex[first]];
- termStrings.add(termString);
- if(lengthsAvailable) {
- termLengths.add(resSets[first].termLengths[termIndex[first]]);
- }
- if(descriptionsAvailable) {
- termDescriptions.add(resSets[first].termDescriptions[termIndex[first]]);
- }
- if(origTermsAvailable) {
- origTerms.add(resSets[first].originalTermStrings[termIndex[first]]);
- }
- if(countsAvailable) {
- // sum all counts
- int frontSize = queue.front(front);
- int[] counts = new int[frontSize];
- for(int i = 0; i < frontSize; i++) {
- int subRunnerId = front[i];
- counts[i]= resSets[subRunnerId].termCounts[termIndex[subRunnerId]];
- }
- termCounts.add(AbstractCompoundTermsQuery.computeCompoundCount(counts, countsStrategy));
- }
- // consume all equal terms
- while(resSets[first].termStrings[termIndex[first]].equals(termString)) {
- // advance this subRunner
- termIndex[first]++;
- if(termIndex[first] == resSets[first].termStrings.length) {
- // 'first' is out
- queue.dequeue();
- if(queue.isEmpty()) break top;
- } else {
- currentTerm[first] = resSets[first].termStrings[termIndex[first]];
- queue.changed();
- }
- first = queue.first();
- }
- }
- // construct the result
- TermsResultSet res = new TermsResultSet(
- termStrings.toArray(new String[termStrings.size()]),
- lengthsAvailable ? termLengths.toIntArray() : null,
- countsAvailable ? termCounts.toIntArray() : null,
- descriptionsAvailable ?
- termDescriptions.toArray(new String[termDescriptions.size()]) : null);
- if(origTermsAvailable){
- res.originalTermStrings = origTerms.toArray(
- new String[origTerms.size()][][]);
- }
- return res;
- }
- }