@CreoleResource(name="GATE Serial Corpus", isPrivate=true, comment="GATE persistent corpus (serialisation)", icon="corpus", helpURL="http://gate.ac.uk/userguide/sec:developer:datastores") public class SerialCorpusImpl extends AbstractLanguageResource implements Corpus, CreoleListener, DatastoreListener, IndexedCorpus, CustomDuplication
Modifier and Type | Field and Description |
---|---|
protected List<Document> |
addedDocs |
protected List<Document> |
changedDocs |
protected Vector<CorpusListener> |
corpusListeners |
protected List<DocumentData> |
docDataList |
protected List<Document> |
documents |
protected IndexManager |
indexManager |
protected List<String> |
removedDocIDs |
dataStore, lrPersistentId
name
features
CORPUS_DOCLIST_PARAMETER_NAME, CORPUS_NAME_PARAMETER_NAME
Modifier | Constructor and Description |
---|---|
|
SerialCorpusImpl() |
protected |
SerialCorpusImpl(Corpus tCorpus)
Constructor to create a SerialCorpus from a transient one.
|
Modifier and Type | Method and Description |
---|---|
boolean |
add(Document o) |
void |
add(int index,
Document o) |
boolean |
addAll(Collection<? extends Document> c) |
boolean |
addAll(int index,
Collection<? extends Document> c) |
void |
addCorpusListener(CorpusListener l)
Registers a new
CorpusListener with this corpus. |
void |
cleanup()
Every LR that is a CreoleListener (and other Listeners too) must
override this method and make sure it removes itself from the
objects which it has been listening to.
|
void |
clear() |
boolean |
contains(Object o) |
boolean |
containsAll(Collection<?> c) |
void |
datastoreClosed(CreoleEvent e)
Called when a
DataStore has been closed |
void |
datastoreCreated(CreoleEvent e)
Called when a
DataStore has been created |
void |
datastoreOpened(CreoleEvent e)
Called when a
DataStore has been opened |
Resource |
duplicate(Factory.DuplicationContext ctx)
SerialCorpusImpl does not support duplication.
|
boolean |
equals(Object o) |
int |
findDocument(Document doc) |
protected void |
fireDocumentAdded(CorpusEvent e) |
protected void |
fireDocumentRemoved(CorpusEvent e) |
Document |
get(int index) |
String |
getDocumentClassType(int index) |
List<String> |
getDocumentClassTypes()
Gets the persistent IDs of the documents in this corpus.
|
String |
getDocumentName(int index)
Gets the name of a document in this corpus.
|
List<String> |
getDocumentNames()
Gets the names of the documents in this corpus.
|
Object |
getDocumentPersistentID(int index)
Gets the persistent ID of a document in this corpus.
|
List<Object> |
getDocumentPersistentIDs()
Gets the persistent IDs of the documents in this corpus.
|
IndexDefinition |
getIndexDefinition() |
IndexManager |
getIndexManager() |
IndexStatistics |
getIndexStatistics() |
Object |
getTransientSource() |
int |
hashCode() |
int |
indexOf(Object o) |
Resource |
init()
Initialise this resource, and return it.
|
boolean |
isDocumentLoaded(int index)
This method returns true when the document is already loaded in
memory
|
boolean |
isEmpty() |
boolean |
isPersistentDocument(int index)
This method returns true when the document is already stored on
disk i.e., is not transient
|
Iterator<Document> |
iterator() |
int |
lastIndexOf(Object o) |
ListIterator<Document> |
listIterator() |
ListIterator<Document> |
listIterator(int index) |
void |
populate(URL directory,
FileFilter filter,
String encoding,
boolean recurseDirectories)
Fills this corpus with documents created from files in a directory.
|
void |
populate(URL directory,
FileFilter filter,
String encoding,
String mimeType,
boolean recurseDirectories)
Fills this corpus with documents created from files in a directory.
|
long |
populate(URL singleConcatenatedFile,
String documentRootElement,
String encoding,
int numberOfFilesToExtract,
String documentNamePrefix,
String mimeType,
boolean includeRootElement)
Fills the provided corpus with documents extracted from the
provided single concatenated file.
|
Document |
remove(int index) |
boolean |
remove(Object o) |
boolean |
removeAll(Collection<?> c) |
void |
removeCorpusListener(CorpusListener l)
Removes one of the listeners registered with this corpus.
|
void |
resourceAdopted(DatastoreEvent evt)
Called by a datastore when a new resource has been adopted
|
void |
resourceDeleted(DatastoreEvent evt)
Called by a datastore when a resource has been deleted
|
void |
resourceLoaded(CreoleEvent e)
Called when a new
Resource has been loaded into the system |
void |
resourceRenamed(Resource resource,
String oldName,
String newName)
Called when the creole register has renamed a resource.1
|
void |
resourceUnloaded(CreoleEvent e)
Called when a
Resource has been removed from the system |
void |
resourceWritten(DatastoreEvent evt)
Called by a datastore when a resource has been wrote into the
datastore
|
boolean |
retainAll(Collection<?> c) |
Document |
set(int index,
Document element) |
void |
setDataStore(DataStore dataStore)
Set the data store that this LR lives in.
|
void |
setDocumentPersistentID(int index,
Object persID)
This method should only be used by the Serial Datastore to set
|
void |
setIndexDefinition(IndexDefinition definition)
Sets the definition to this corpus.
|
void |
setTransientSource(Object source) |
int |
size() |
List<Document> |
subList(int fromIndex,
int toIndex)
persistent Corpus does not support this method as all the documents
might no be in memory
|
Object[] |
toArray() |
<T> T[] |
toArray(T[] a) |
String |
toString() |
void |
unloadDocument(Document doc)
Unloads a document from memory, calling sync() first, to store the
changes.
|
void |
unloadDocument(Document doc,
boolean sync)
Unloads a document from memory
|
void |
unloadDocument(int index)
Unloads the document from memory, calling sync() first, to store
the changes.
|
void |
unloadDocument(int index,
boolean sync)
Unloads a document from memory.
|
getDataStore, getLRPersistenceId, getParent, isModified, setLRPersistenceId, setParent, sync
checkParameterValues, flushBeanInfoCache, forgetBeanInfo, getBeanInfo, getInitParameterValues, getInitParameterValues, getName, getParameterValue, getParameterValue, getParameterValues, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners
getFeatures, setFeatures
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
getDataStore, getLRPersistenceId, getParent, isModified, setLRPersistenceId, setParent, sync
getParameterValue, setParameterValue, setParameterValues
getFeatures, setFeatures
replaceAll, sort, spliterator
parallelStream, removeIf, stream
protected transient Vector<CorpusListener> corpusListeners
protected List<DocumentData> docDataList
protected transient IndexManager indexManager
public SerialCorpusImpl()
protected SerialCorpusImpl(Corpus tCorpus)
public List<String> getDocumentNames()
getDocumentNames
in interface SimpleCorpus
List
of Strings representing the names of the
documents in this corpus.public List<Object> getDocumentPersistentIDs()
List
of Objects representing the persistent IDs
of the documents in this corpus.public List<String> getDocumentClassTypes()
List
of Objects representing the persistent IDs
of the documents in this corpus.public void setDocumentPersistentID(int index, Object persID)
public String getDocumentName(int index)
getDocumentName
in interface SimpleCorpus
index
- the index of the documentpublic Object getDocumentPersistentID(int index)
index
- the index of the documentpublic String getDocumentClassType(int index)
public void unloadDocument(int index, boolean sync)
index
- the index of the document to be unloaded.sync
- should the document be sync'ed (i.e. saved) before
unloading.public void unloadDocument(Document doc, boolean sync)
doc
- the document to be unloadedsync
- should the document be sync'ed (i.e. saved) before
unloading.public void unloadDocument(Document doc)
unloadDocument
in interface Corpus
doc
- the document to be unloaded.public void unloadDocument(int index)
index
- the index of the document to be unloaded.public boolean isDocumentLoaded(int index)
isDocumentLoaded
in interface Corpus
public boolean isPersistentDocument(int index)
public void cleanup()
cleanup
in interface Resource
cleanup
in class AbstractLanguageResource
public void populate(URL directory, FileFilter filter, String encoding, boolean recurseDirectories) throws IOException, ResourceInstantiationException
populate
in interface SimpleCorpus
filter
- the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.directory
- the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown. An implementation for this method is
provided as a static method at
CorpusImpl.populate(Corpus, URL, FileFilter, String, boolean)
.encoding
- the encoding to be used for reading the documentsrecurseDirectories
- should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.IOException
ResourceInstantiationException
public void populate(URL directory, FileFilter filter, String encoding, String mimeType, boolean recurseDirectories) throws IOException, ResourceInstantiationException
populate
in interface SimpleCorpus
filter
- the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.directory
- the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown. An implementation for this method is
provided as a static method at
CorpusImpl.populate(Corpus, URL, FileFilter, String, boolean)
.encoding
- the encoding to be used for reading the documentsrecurseDirectories
- should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.mimeType
- the mime type to be used when loading documents. If
null, then the mime type will be automatically determined.IOException
ResourceInstantiationException
public long populate(URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfFilesToExtract, String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException, ResourceInstantiationException
populate
in interface SimpleCorpus
singleConcatenatedFile
- the single concatenated file.documentRootElement
- content between the start and end of
this element is considered for documents.encoding
- the encoding of the trec file.numberOfFilesToExtract
- indicates the number of files to
extract from the trecweb file.documentNamePrefix
- the prefix to use for document names when
creating frommimeType
- the mime type which determines how the document is handledIOException
ResourceInstantiationException
public void removeCorpusListener(CorpusListener l)
Corpus
removeCorpusListener
in interface Corpus
l
- the listener to be removed.public void addCorpusListener(CorpusListener l)
Corpus
CorpusListener
with this corpus.addCorpusListener
in interface Corpus
l
- the listener to be added.protected void fireDocumentAdded(CorpusEvent e)
protected void fireDocumentRemoved(CorpusEvent e)
public void resourceLoaded(CreoleEvent e)
CreoleListener
Resource
has been loaded into the systemresourceLoaded
in interface CreoleListener
public void resourceRenamed(Resource resource, String oldName, String newName)
CreoleListener
resourceRenamed
in interface CreoleListener
public void resourceUnloaded(CreoleEvent e)
CreoleListener
Resource
has been removed from the systemresourceUnloaded
in interface CreoleListener
public void datastoreOpened(CreoleEvent e)
CreoleListener
DataStore
has been openeddatastoreOpened
in interface CreoleListener
public void datastoreCreated(CreoleEvent e)
CreoleListener
DataStore
has been createddatastoreCreated
in interface CreoleListener
public void datastoreClosed(CreoleEvent e)
CreoleListener
DataStore
has been closeddatastoreClosed
in interface CreoleListener
public void resourceAdopted(DatastoreEvent evt)
resourceAdopted
in interface DatastoreListener
public void resourceDeleted(DatastoreEvent evt)
resourceDeleted
in interface DatastoreListener
public void resourceWritten(DatastoreEvent evt)
resourceWritten
in interface DatastoreListener
public int size()
public boolean isEmpty()
public boolean contains(Object o)
public String toString()
toString
in class AbstractResource
public Object[] toArray()
public <T> T[] toArray(T[] a)
public boolean add(Document o)
public boolean remove(Object o)
public int findDocument(Document doc)
public boolean containsAll(Collection<?> c)
containsAll
in interface Collection<Document>
containsAll
in interface List<Document>
public boolean addAll(Collection<? extends Document> c)
public boolean addAll(int index, Collection<? extends Document> c)
public boolean removeAll(Collection<?> c)
public boolean retainAll(Collection<?> c)
public void clear()
public boolean equals(Object o)
public int hashCode()
public int lastIndexOf(Object o)
lastIndexOf
in interface List<Document>
public ListIterator<Document> listIterator()
listIterator
in interface List<Document>
public ListIterator<Document> listIterator(int index)
listIterator
in interface List<Document>
public List<Document> subList(int fromIndex, int toIndex)
public void setDataStore(DataStore dataStore) throws PersistenceException
AbstractLanguageResource
setDataStore
in interface LanguageResource
setDataStore
in class AbstractLanguageResource
PersistenceException
public void setTransientSource(Object source)
public Object getTransientSource()
public Resource init() throws ResourceInstantiationException
AbstractResource
init
in interface Resource
init
in class AbstractResource
ResourceInstantiationException
public void setIndexDefinition(IndexDefinition definition)
IndexedCorpus
setIndexDefinition
in interface IndexedCorpus
definition
- of index for this corpuspublic IndexDefinition getIndexDefinition()
getIndexDefinition
in interface IndexedCorpus
public IndexManager getIndexManager()
getIndexManager
in interface IndexedCorpus
public IndexStatistics getIndexStatistics()
getIndexStatistics
in interface IndexedCorpus
public Resource duplicate(Factory.DuplicationContext ctx) throws ResourceInstantiationException
duplicate
in interface CustomDuplication
ctx
- the current duplication context
.
If an implementation of this method needs to duplicate any
other resources as part of the custom duplication process
it should pass this context back to the two-argument form of
Factory.duplicate
rather than using the single-argument form.ResourceInstantiationException
Copyright © 2024 GATE. All rights reserved.