OrTermsQuery.java
/*
* OrTermsQuery.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 18 Jul 2012
*
* $Id: OrTermsQuery.java 16583 2013-03-12 13:07:53Z valyt $
*/
package gate.mimir.search.terms;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;
/**
* Boolean OR operator for term queries.
* The default count strategy used is
* {@link AbstractCompoundTermsQuery.CompoundCountsStrategy#FIRST}.
*/
public class OrTermsQuery extends AbstractCompoundTermsQuery {
/**
* Serialization ID.
*/
private static final long serialVersionUID = 3293699315503739659L;
/**
* Constructs a new OR terms query.
* @param stringsEnabled should terms strings be returned.
* @param countsEnabled should term counts be returned. Counts are
* accumulated across all sub-queries: the count for a term is the sum of all
* counts for the same term in all sub-queries.
* @param limit the maximum number of terms to be returned.
* @param subQueries the term queries that form the disjunction.
*/
public OrTermsQuery(TermsQuery... subQueries) {
super(subQueries);
setCountsStrategy(AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST);
}
/* (non-Javadoc)
* @see gate.mimir.search.terms.CompoundTermsQuery#combine(gate.mimir.search.terms.TermsResultSet[])
*/
@Override
public TermsResultSet combine(TermsResultSet... resSets) {
return orResultsSets(resSets, countsStrategy);
}
/**
* Given a set of {@link TermsResultSet} values, this method combines them
* into a single {@link TermsResultSet} representing the disjunction of all
* the provided results sets.
* @param resSets
* @return
*/
public static TermsResultSet orResultsSets(TermsResultSet[] resSets,
AbstractCompoundTermsQuery.CompoundCountsStrategy countsStrategy) {
if(countsStrategy == null) countsStrategy = AbstractCompoundTermsQuery.CompoundCountsStrategy.FIRST;
String[] currentTerm = new String[resSets.length];
ObjectHeapSemiIndirectPriorityQueue<String> queue =
new ObjectHeapSemiIndirectPriorityQueue<String>(currentTerm);
int[] termIndex = new int[resSets.length];
boolean lengthsAvailable = true;
boolean descriptionsAvailable = true;
boolean origTermsAvailable = true;
boolean countsAvailable = true;
for(int i = 0; i < resSets.length; i++) {
// this implementation requires that all sub-queries return terms in a
// consistent order, so we sort them lexicographically by termString
TermsResultSet.sortTermsResultSetByTermString(resSets[i]);
if(resSets[i].termStrings.length > 0){
termIndex[i] = 0;
currentTerm[i] = resSets[i].termStrings[termIndex[i]];
queue.enqueue(i);
}
// we need *all* sub-queries to provide lengths, because we don't know
// which one will provide any of the results.
if(resSets[i].termLengths == null) lengthsAvailable = false;
if(resSets[i].termCounts == null) countsAvailable = false;
if(resSets[i].termDescriptions == null) descriptionsAvailable = false;
if(resSets[i].originalTermStrings == null) origTermsAvailable = false;
}
// prepare local data
ObjectArrayList<String> termStrings = new ObjectArrayList<String>();
ObjectArrayList<String> termDescriptions = descriptionsAvailable ?
new ObjectArrayList<String>() : null;
ObjectArrayList<String[][]> origTerms = origTermsAvailable ?
new ObjectArrayList<String[][]>() : null;
IntArrayList termLengths = lengthsAvailable ? new IntArrayList() : null;
IntArrayList termCounts = countsAvailable ? new IntArrayList() : null;
int front[] = new int[resSets.length];
// enumerate all terms
top:while(!queue.isEmpty()) {
int first = queue.first();
String termString = resSets[first].termStrings[termIndex[first]];
termStrings.add(termString);
if(lengthsAvailable) {
termLengths.add(resSets[first].termLengths[termIndex[first]]);
}
if(descriptionsAvailable) {
termDescriptions.add(resSets[first].termDescriptions[termIndex[first]]);
}
if(origTermsAvailable) {
origTerms.add(resSets[first].originalTermStrings[termIndex[first]]);
}
if(countsAvailable) {
// sum all counts
int frontSize = queue.front(front);
int[] counts = new int[frontSize];
for(int i = 0; i < frontSize; i++) {
int subRunnerId = front[i];
counts[i]= resSets[subRunnerId].termCounts[termIndex[subRunnerId]];
}
termCounts.add(AbstractCompoundTermsQuery.computeCompoundCount(counts, countsStrategy));
}
// consume all equal terms
while(resSets[first].termStrings[termIndex[first]].equals(termString)) {
// advance this subRunner
termIndex[first]++;
if(termIndex[first] == resSets[first].termStrings.length) {
// 'first' is out
queue.dequeue();
if(queue.isEmpty()) break top;
} else {
currentTerm[first] = resSets[first].termStrings[termIndex[first]];
queue.changed();
}
first = queue.first();
}
}
// construct the result
TermsResultSet res = new TermsResultSet(
termStrings.toArray(new String[termStrings.size()]),
lengthsAvailable ? termLengths.toIntArray() : null,
countsAvailable ? termCounts.toIntArray() : null,
descriptionsAvailable ?
termDescriptions.toArray(new String[termDescriptions.size()]) : null);
if(origTermsAvailable){
res.originalTermStrings = origTerms.toArray(
new String[origTerms.size()][][]);
}
return res;
}
}