AbstractOverlapQuery.java
/*
* AbstractOverlapQuery.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 21 Apr 2009
*
* $Id: AbstractOverlapQuery.java 20208 2017-04-19 08:35:28Z domrout $
*/
package gate.mimir.search.query;
import gate.mimir.search.QueryEngine;
import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.search.visitor.DocumentIteratorVisitor;
import java.io.IOException;
import java.util.*;
/**
* Abstract class providing the shared functionality used by both
* {@link WithinQuery} and {@link ContainsQuery}.
*/
public abstract class AbstractOverlapQuery implements QueryNode{
private static final long serialVersionUID = -6787211242951582971L;
/**
* @param innerQuery
* @param outerQuery
*/
public AbstractOverlapQuery(QueryNode innerQuery, QueryNode outerQuery) {
this.innerQuery = innerQuery;
this.outerQuery = outerQuery;
}
public static class OverlapQueryExecutor extends AbstractQueryExecutor{
/**
* Which of the sub-queries is the target?
*/
protected SubQuery targetQuery;
/**
* @param engine
* @param query
* @throws IOException
*/
public OverlapQueryExecutor(AbstractOverlapQuery query, QueryEngine engine,
SubQuery target) throws IOException {
super(engine, query);
this.targetQuery = target;
innerExecutor = query.innerQuery.getQueryExecutor(engine);
outerExecutor = query.outerQuery.getQueryExecutor(engine);
hitsOnCurrentDocument = new ArrayList<Binding>();
}
protected QueryExecutor innerExecutor;
protected QueryExecutor outerExecutor;
/* (non-Javadoc)
* @see gate.mimir.search.query.QueryExecutor#close()
*/
public void close() throws IOException {
if(closed) return;
super.close();
innerExecutor.close();
outerExecutor.close();
}
/* (non-Javadoc)
* @see gate.mimir.search.query.QueryExecutor#nextDocument(int)
*/
public long nextDocument(long greaterThan) throws IOException {
if(closed) return latestDocument = -1;
if(latestDocument == -1) return latestDocument;
hitsOnCurrentDocument.clear();
while(hitsOnCurrentDocument.isEmpty() && latestDocument != -1){
//find a document on which both sub-executors agree
long outerNext = outerExecutor.nextDocument(greaterThan);
long innerNext = innerExecutor.nextDocument(greaterThan);
while(outerNext != innerNext){
if(outerNext == -1 || innerNext == -1){
//one executor has run out -> we're done!
return latestDocument = -1;
}
//advance the smallest one
while(outerNext < innerNext){
outerNext = outerExecutor.nextDocument(innerNext - 1);
if(outerNext == -1) return -1;
}
while(innerNext < outerNext){
innerNext = innerExecutor.nextDocument(outerNext -1);
if(innerNext == -1) return -1;
}
}
//at this point, the next docs are the same
latestDocument = outerNext;
//now check that there are actual hits on the current doc
if(latestDocument != -1){
getHitsOnCurrentDocument();
}
}
return latestDocument;
}
/* (non-Javadoc)
* @see gate.mimir.search.query.QueryExecutor#nextHit()
*/
public Binding nextHit() throws IOException {
if(closed) return null;
return hitsOnCurrentDocument.isEmpty() ? null : hitsOnCurrentDocument.remove(0);
}
protected List<Binding> hitsOnCurrentDocument;
protected void getHitsOnCurrentDocument() throws IOException{
hitsOnCurrentDocument.clear();
//get the hits from each of the sub-executors
List<Binding> outerHits = new LinkedList<Binding>();
Binding aHit = outerExecutor.nextHit();
while(aHit != null){
outerHits.add(aHit);
aHit = outerExecutor.nextHit();
}
List<Binding> innerHits = new LinkedList<Binding>();
aHit = innerExecutor.nextHit();
while(aHit != null){
innerHits.add(aHit);
aHit = innerExecutor.nextHit();
}
//now find the overlaps
outer:for(Binding outerHit : outerHits){
Iterator<Binding> innerIter = innerHits.iterator();
while(innerIter.hasNext()){
Binding innerHit = innerIter.next();
if(innerHit.getTermPosition() < outerHit.getTermPosition()){
//inner not useful any more
innerIter.remove();
}else if((innerHit.getTermPosition() + innerHit.getLength()) <=
(outerHit.getTermPosition() + outerHit.getLength())){
//good hit:
// inner.start not smaller than outer.start &&
// inner.end smaller than outer.end
switch(targetQuery){
case INNER:
hitsOnCurrentDocument.add(buildBinding(innerHit, outerHit));
//hit returned, cannot be used any more
innerIter.remove();
break;
case OUTER:
hitsOnCurrentDocument.add(buildBinding(outerHit,innerHit));
//hit returned, move to next one
continue outer;
}
}else if(innerHit.getTermPosition() >
(outerHit.getTermPosition() + outerHit.getLength())){
//all remaining inners are later than current outer
//move to next outer
continue outer;
}else{
//current inner starts inside current outer, but ends outside
//it may still be useful for other outers
//we just ignore it, and move to the next one
}
}
}
}
/**
* Builds the Binding instance we will return, including populating the
* sub-bindings if enabled at the index level
*
* @param main
* the main Binding that is the actual hit of the query
* @param filter
* the Binding used to filter the query hits
* @return the main Binding with it's sub-bindings filled in as required
*/
private Binding buildBinding(Binding main, Binding filter) {
// if we don't want sub-bindings then just return the main binding
if(!engine.isSubBindingsEnabled()) return main;
Binding[] bindings;
int bindingsCount = 1;
// count the number of sub bindings we need to copy from both the main and
// filter binding instances
if(main.getContainedBindings() != null)
bindingsCount += main.getContainedBindings().length;
if(filter.getContainedBindings() != null)
bindingsCount += filter.getContainedBindings().length;
// create the array to hold all the sub-bindings
bindings = new Binding[bindingsCount];
// copy in any sub-bindings from the main query binding
if(main.getContainedBindings() != null) {
System.arraycopy(main.getContainedBindings(), 0, bindings, 0,
main.getContainedBindings().length);
}
// copy in any sub-bindings from the filter query binding
if(filter.getContainedBindings() != null) {
System.arraycopy(filter.getContainedBindings(), 0, bindings,
bindingsCount - filter.getContainedBindings().length,
filter.getContainedBindings().length);
}
// now we've used the sub-bindings from the filter remove them
filter.setContainedBindings(null);
// we want to store the filter binding itself as well
if(main.getContainedBindings() != null) {
bindings[main.getContainedBindings().length] = filter;
} else {
bindings[0] = filter;
}
// set the contained bindings on the main query binding
main.setContainedBindings(bindings);
// return the binding
return main;
}
public <T> T accept( final DocumentIteratorVisitor<T> visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( 2 );
if ( a == null ) {
if ( innerExecutor.accept( visitor ) == null ) return null;
if ( outerExecutor.accept( visitor ) == null ) return null;
}
else {
if ( ( a[ 0 ] = innerExecutor.accept( visitor ) ) == null ) return null;
if ( ( a[ 1 ] = outerExecutor.accept( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
public <T> T acceptOnTruePaths( final DocumentIteratorVisitor<T> visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( 1 );
QueryExecutor executor = targetQuery == SubQuery.INNER ?
innerExecutor : outerExecutor;
if ( a == null ) {
if ( executor.acceptOnTruePaths( visitor ) == null ) return null;
}
else {
if ( ( a[ 0 ] = executor.acceptOnTruePaths( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
@Override
public ReferenceSet<Index> indices() {
if(indices == null) {
// we keep all indexes (even though only the ones on one branch will be
// needed) so that the visiting doesn't need to care about the orientation.
indices = new ReferenceArraySet<Index>();
indices.addAll(innerExecutor.indices());
indices.addAll(outerExecutor.indices());
}
return indices;
}
protected ReferenceSet<Index> indices;
}
/**
* A simple enum used to identify the two sub-queries.
*/
protected enum SubQuery{
INNER, OUTER
}
/**
* The query providing the inner intervals.
*/
protected QueryNode innerQuery;
/**
* The query providing the outer intervals.
*/
protected QueryNode outerQuery;
public QueryNode getInnerQuery() {
return innerQuery;
}
public QueryNode getOuterQuery() {
return outerQuery;
}
}