@CreoleResource(name="GATE Serial Corpus", isPrivate=true, comment="GATE persistent corpus (serialisation)", icon="corpus", helpURL="http://gate.ac.uk/userguide/sec:developer:datastores") public class SerialCorpusImpl extends AbstractLanguageResource implements Corpus, CreoleListener, DatastoreListener, IndexedCorpus, CustomDuplication
| Modifier and Type | Field and Description |
|---|---|
protected List<Document> |
addedDocs |
protected List<Document> |
changedDocs |
protected Vector<CorpusListener> |
corpusListeners |
protected List<DocumentData> |
docDataList |
protected List<Document> |
documents |
protected IndexManager |
indexManager |
protected List<String> |
removedDocIDs |
dataStore, lrPersistentIdnamefeaturesCORPUS_DOCLIST_PARAMETER_NAME, CORPUS_NAME_PARAMETER_NAME| Modifier | Constructor and Description |
|---|---|
|
SerialCorpusImpl() |
protected |
SerialCorpusImpl(Corpus tCorpus)
Constructor to create a SerialCorpus from a transient one.
|
| Modifier and Type | Method and Description |
|---|---|
boolean |
add(Document o) |
void |
add(int index,
Document o) |
boolean |
addAll(Collection<? extends Document> c) |
boolean |
addAll(int index,
Collection<? extends Document> c) |
void |
addCorpusListener(CorpusListener l)
Registers a new
CorpusListener with this corpus. |
void |
cleanup()
Every LR that is a CreoleListener (and other Listeners too) must
override this method and make sure it removes itself from the
objects which it has been listening to.
|
void |
clear() |
boolean |
contains(Object o) |
boolean |
containsAll(Collection<?> c) |
void |
datastoreClosed(CreoleEvent e)
Called when a
DataStore has been closed |
void |
datastoreCreated(CreoleEvent e)
Called when a
DataStore has been created |
void |
datastoreOpened(CreoleEvent e)
Called when a
DataStore has been opened |
Resource |
duplicate(Factory.DuplicationContext ctx)
SerialCorpusImpl does not support duplication.
|
boolean |
equals(Object o) |
int |
findDocument(Document doc) |
protected void |
fireDocumentAdded(CorpusEvent e) |
protected void |
fireDocumentRemoved(CorpusEvent e) |
Document |
get(int index) |
String |
getDocumentClassType(int index) |
List<String> |
getDocumentClassTypes()
Gets the persistent IDs of the documents in this corpus.
|
String |
getDocumentName(int index)
Gets the name of a document in this corpus.
|
List<String> |
getDocumentNames()
Gets the names of the documents in this corpus.
|
Object |
getDocumentPersistentID(int index)
Gets the persistent ID of a document in this corpus.
|
List<Object> |
getDocumentPersistentIDs()
Gets the persistent IDs of the documents in this corpus.
|
IndexDefinition |
getIndexDefinition() |
IndexManager |
getIndexManager() |
IndexStatistics |
getIndexStatistics() |
Object |
getTransientSource() |
int |
hashCode() |
int |
indexOf(Object o) |
Resource |
init()
Initialise this resource, and return it.
|
boolean |
isDocumentLoaded(int index)
This method returns true when the document is already loaded in
memory
|
boolean |
isEmpty() |
boolean |
isPersistentDocument(int index)
This method returns true when the document is already stored on
disk i.e., is not transient
|
Iterator<Document> |
iterator() |
int |
lastIndexOf(Object o) |
ListIterator<Document> |
listIterator() |
ListIterator<Document> |
listIterator(int index) |
void |
populate(URL directory,
FileFilter filter,
String encoding,
boolean recurseDirectories)
Fills this corpus with documents created from files in a directory.
|
void |
populate(URL directory,
FileFilter filter,
String encoding,
String mimeType,
boolean recurseDirectories)
Fills this corpus with documents created from files in a directory.
|
long |
populate(URL singleConcatenatedFile,
String documentRootElement,
String encoding,
int numberOfFilesToExtract,
String documentNamePrefix,
String mimeType,
boolean includeRootElement)
Fills the provided corpus with documents extracted from the
provided single concatenated file.
|
Document |
remove(int index) |
boolean |
remove(Object o) |
boolean |
removeAll(Collection<?> c) |
void |
removeCorpusListener(CorpusListener l)
Removes one of the listeners registered with this corpus.
|
void |
resourceAdopted(DatastoreEvent evt)
Called by a datastore when a new resource has been adopted
|
void |
resourceDeleted(DatastoreEvent evt)
Called by a datastore when a resource has been deleted
|
void |
resourceLoaded(CreoleEvent e)
Called when a new
Resource has been loaded into the system |
void |
resourceRenamed(Resource resource,
String oldName,
String newName)
Called when the creole register has renamed a resource.1
|
void |
resourceUnloaded(CreoleEvent e)
Called when a
Resource has been removed from the system |
void |
resourceWritten(DatastoreEvent evt)
Called by a datastore when a resource has been wrote into the
datastore
|
boolean |
retainAll(Collection<?> c) |
Document |
set(int index,
Document element) |
void |
setDataStore(DataStore dataStore)
Set the data store that this LR lives in.
|
void |
setDocumentPersistentID(int index,
Object persID)
This method should only be used by the Serial Datastore to set
|
void |
setIndexDefinition(IndexDefinition definition)
Sets the definition to this corpus.
|
void |
setTransientSource(Object source) |
int |
size() |
List<Document> |
subList(int fromIndex,
int toIndex)
persistent Corpus does not support this method as all the documents
might no be in memory
|
Object[] |
toArray() |
<T> T[] |
toArray(T[] a) |
String |
toString() |
void |
unloadDocument(Document doc)
Unloads a document from memory, calling sync() first, to store the
changes.
|
void |
unloadDocument(Document doc,
boolean sync)
Unloads a document from memory
|
void |
unloadDocument(int index)
Unloads the document from memory, calling sync() first, to store
the changes.
|
void |
unloadDocument(int index,
boolean sync)
Unloads a document from memory.
|
getDataStore, getLRPersistenceId, getParent, isModified, setLRPersistenceId, setParent, synccheckParameterValues, flushBeanInfoCache, forgetBeanInfo, getBeanInfo, getInitParameterValues, getInitParameterValues, getName, getParameterValue, getParameterValue, getParameterValues, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListenersgetFeatures, setFeaturesclone, finalize, getClass, notify, notifyAll, wait, wait, waitgetDataStore, getLRPersistenceId, getParent, isModified, setLRPersistenceId, setParent, syncgetParameterValue, setParameterValue, setParameterValuesgetFeatures, setFeaturesreplaceAll, sort, spliteratorparallelStream, removeIf, streamprotected transient Vector<CorpusListener> corpusListeners
protected List<DocumentData> docDataList
protected transient IndexManager indexManager
public SerialCorpusImpl()
protected SerialCorpusImpl(Corpus tCorpus)
public List<String> getDocumentNames()
getDocumentNames in interface SimpleCorpusList of Strings representing the names of the
documents in this corpus.public List<Object> getDocumentPersistentIDs()
List of Objects representing the persistent IDs
of the documents in this corpus.public List<String> getDocumentClassTypes()
List of Objects representing the persistent IDs
of the documents in this corpus.public void setDocumentPersistentID(int index,
Object persID)
public String getDocumentName(int index)
getDocumentName in interface SimpleCorpusindex - the index of the documentpublic Object getDocumentPersistentID(int index)
index - the index of the documentpublic String getDocumentClassType(int index)
public void unloadDocument(int index,
boolean sync)
index - the index of the document to be unloaded.sync - should the document be sync'ed (i.e. saved) before
unloading.public void unloadDocument(Document doc, boolean sync)
doc - the document to be unloadedsync - should the document be sync'ed (i.e. saved) before
unloading.public void unloadDocument(Document doc)
unloadDocument in interface Corpusdoc - the document to be unloaded.public void unloadDocument(int index)
index - the index of the document to be unloaded.public boolean isDocumentLoaded(int index)
isDocumentLoaded in interface Corpuspublic boolean isPersistentDocument(int index)
public void cleanup()
cleanup in interface Resourcecleanup in class AbstractLanguageResourcepublic void populate(URL directory, FileFilter filter, String encoding, boolean recurseDirectories) throws IOException, ResourceInstantiationException
populate in interface SimpleCorpusfilter - the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.directory - the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown. An implementation for this method is
provided as a static method at
CorpusImpl.populate(Corpus, URL, FileFilter, String, boolean)
.encoding - the encoding to be used for reading the documentsrecurseDirectories - should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.IOExceptionResourceInstantiationExceptionpublic void populate(URL directory, FileFilter filter, String encoding, String mimeType, boolean recurseDirectories) throws IOException, ResourceInstantiationException
populate in interface SimpleCorpusfilter - the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.directory - the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown. An implementation for this method is
provided as a static method at
CorpusImpl.populate(Corpus, URL, FileFilter, String, boolean)
.encoding - the encoding to be used for reading the documentsrecurseDirectories - should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.mimeType - the mime type to be used when loading documents. If
null, then the mime type will be automatically determined.IOExceptionResourceInstantiationExceptionpublic long populate(URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfFilesToExtract, String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException, ResourceInstantiationException
populate in interface SimpleCorpussingleConcatenatedFile - the single concatenated file.documentRootElement - content between the start and end of
this element is considered for documents.encoding - the encoding of the trec file.numberOfFilesToExtract - indicates the number of files to
extract from the trecweb file.documentNamePrefix - the prefix to use for document names when
creating frommimeType - the mime type which determines how the document is handledIOExceptionResourceInstantiationExceptionpublic void removeCorpusListener(CorpusListener l)
CorpusremoveCorpusListener in interface Corpusl - the listener to be removed.public void addCorpusListener(CorpusListener l)
CorpusCorpusListener with this corpus.addCorpusListener in interface Corpusl - the listener to be added.protected void fireDocumentAdded(CorpusEvent e)
protected void fireDocumentRemoved(CorpusEvent e)
public void resourceLoaded(CreoleEvent e)
CreoleListenerResource has been loaded into the systemresourceLoaded in interface CreoleListenerpublic void resourceRenamed(Resource resource, String oldName, String newName)
CreoleListenerresourceRenamed in interface CreoleListenerpublic void resourceUnloaded(CreoleEvent e)
CreoleListenerResource has been removed from the systemresourceUnloaded in interface CreoleListenerpublic void datastoreOpened(CreoleEvent e)
CreoleListenerDataStore has been openeddatastoreOpened in interface CreoleListenerpublic void datastoreCreated(CreoleEvent e)
CreoleListenerDataStore has been createddatastoreCreated in interface CreoleListenerpublic void datastoreClosed(CreoleEvent e)
CreoleListenerDataStore has been closeddatastoreClosed in interface CreoleListenerpublic void resourceAdopted(DatastoreEvent evt)
resourceAdopted in interface DatastoreListenerpublic void resourceDeleted(DatastoreEvent evt)
resourceDeleted in interface DatastoreListenerpublic void resourceWritten(DatastoreEvent evt)
resourceWritten in interface DatastoreListenerpublic int size()
public boolean isEmpty()
public boolean contains(Object o)
public String toString()
toString in class AbstractResourcepublic Object[] toArray()
public <T> T[] toArray(T[] a)
public boolean add(Document o)
public boolean remove(Object o)
public int findDocument(Document doc)
public boolean containsAll(Collection<?> c)
containsAll in interface Collection<Document>containsAll in interface List<Document>public boolean addAll(Collection<? extends Document> c)
public boolean addAll(int index,
Collection<? extends Document> c)
public boolean removeAll(Collection<?> c)
public boolean retainAll(Collection<?> c)
public void clear()
public boolean equals(Object o)
public int hashCode()
public int lastIndexOf(Object o)
lastIndexOf in interface List<Document>public ListIterator<Document> listIterator()
listIterator in interface List<Document>public ListIterator<Document> listIterator(int index)
listIterator in interface List<Document>public List<Document> subList(int fromIndex, int toIndex)
public void setDataStore(DataStore dataStore) throws PersistenceException
AbstractLanguageResourcesetDataStore in interface LanguageResourcesetDataStore in class AbstractLanguageResourcePersistenceExceptionpublic void setTransientSource(Object source)
public Object getTransientSource()
public Resource init() throws ResourceInstantiationException
AbstractResourceinit in interface Resourceinit in class AbstractResourceResourceInstantiationExceptionpublic void setIndexDefinition(IndexDefinition definition)
IndexedCorpussetIndexDefinition in interface IndexedCorpusdefinition - of index for this corpuspublic IndexDefinition getIndexDefinition()
getIndexDefinition in interface IndexedCorpuspublic IndexManager getIndexManager()
getIndexManager in interface IndexedCorpuspublic IndexStatistics getIndexStatistics()
getIndexStatistics in interface IndexedCorpuspublic Resource duplicate(Factory.DuplicationContext ctx) throws ResourceInstantiationException
duplicate in interface CustomDuplicationctx - the current duplication context.
If an implementation of this method needs to duplicate any
other resources as part of the custom duplication process
it should pass this context back to the two-argument form of
Factory.duplicate
rather than using the single-argument form.ResourceInstantiationExceptionCopyright © 2024 GATE. All rights reserved.