@CreoleResource(name="GATE Corpus", comment="GATE transient corpus.", interfaceName="gate.Corpus", icon="corpus-trans", helpURL="http://gate.ac.uk/userguide/sec:developer:loadlr") public class CorpusImpl extends AbstractLanguageResource implements Corpus, CreoleListener, CustomDuplication
Modifier and Type | Class and Description |
---|---|
protected class |
CorpusImpl.VerboseList
A proxy list that stores the actual data in an internal list and
forwards all operations to that one but it also fires the
appropriate corpus events when necessary.
|
Modifier and Type | Field and Description |
---|---|
protected List<Document> |
documentsList |
protected List<Document> |
supportList
The underlying list that holds the documents in this corpus.
|
dataStore, lrPersistentId
name
features
CORPUS_DOCLIST_PARAMETER_NAME, CORPUS_NAME_PARAMETER_NAME
Constructor and Description |
---|
CorpusImpl() |
Modifier and Type | Method and Description |
---|---|
boolean |
add(Document o) |
void |
add(int index,
Document element) |
boolean |
addAll(Collection<? extends Document> c) |
boolean |
addAll(int index,
Collection<? extends Document> c) |
void |
addCorpusListener(CorpusListener l)
Registers a new
CorpusListener with this corpus. |
void |
cleanup()
Construction
|
void |
clear() |
protected void |
clearDocList() |
boolean |
contains(Object o) |
boolean |
containsAll(Collection<?> c) |
void |
datastoreClosed(CreoleEvent e)
Called when a
DataStore has been closed |
void |
datastoreCreated(CreoleEvent e)
Called when a
DataStore has been created |
void |
datastoreOpened(CreoleEvent e)
Called when a
DataStore has been opened |
Resource |
duplicate(Factory.DuplicationContext ctx)
Custom duplication for a corpus - duplicate this corpus in the
usual way, then duplicate the documents in this corpus and add them
to the duplicate.
|
boolean |
equals(Object o) |
protected void |
fireDocumentAdded(CorpusEvent e) |
protected void |
fireDocumentRemoved(CorpusEvent e) |
Document |
get(int index) |
String |
getDocumentName(int index)
Gets the name of a document in this corpus.
|
List<String> |
getDocumentNames()
Gets the names of the documents in this corpus.
|
List<Document> |
getDocumentsList() |
int |
hashCode() |
int |
indexOf(Object o) |
Resource |
init()
Initialise this resource, and return it.
|
boolean |
isDocumentLoaded(int index)
This method returns true when the document is already loaded in
memory
|
boolean |
isEmpty() |
Iterator<Document> |
iterator() |
int |
lastIndexOf(Object o) |
ListIterator<Document> |
listIterator() |
ListIterator<Document> |
listIterator(int index) |
static void |
populate(Corpus corpus,
URL directory,
FileFilter filter,
String encoding,
boolean recurseDirectories)
Fills the provided corpus with documents created on the fly from
selected files in a directory.
|
static void |
populate(Corpus corpus,
URL directory,
FileFilter filter,
String encoding,
String mimeType,
boolean recurseDirectories)
Fills the provided corpus with documents created on the fly from
selected files in a directory.
|
static long |
populate(Corpus corpus,
URL singleConcatenatedFile,
String documentRootElement,
String encoding,
int numberOfDocumentsToExtract,
String documentNamePrefix,
String mimeType,
boolean includeRootElement)
Fills the provided corpus with documents extracted from the
provided trec file.
|
void |
populate(URL directory,
FileFilter filter,
String encoding,
boolean recurseDirectories)
Fills this corpus with documents created from files in a directory.
|
void |
populate(URL directory,
FileFilter filter,
String encoding,
String mimeType,
boolean recurseDirectories)
Fills this corpus with documents created from files in a directory.
|
long |
populate(URL singleConcatenatedFile,
String documentRootElement,
String encoding,
int numberOfFilesToExtract,
String documentNamePrefix,
String mimeType,
boolean includeRootElement)
Fills the provided corpus with documents extracted from the
provided single concatenated file.
|
Document |
remove(int index) |
boolean |
remove(Object o) |
boolean |
removeAll(Collection<?> c) |
void |
removeCorpusListener(CorpusListener l)
Removes one of the listeners registered with this corpus.
|
void |
resourceLoaded(CreoleEvent e)
Called when a new
Resource has been loaded into the system |
void |
resourceRenamed(Resource resource,
String oldName,
String newName)
Called when the creole register has renamed a resource.1
|
void |
resourceUnloaded(CreoleEvent e)
Called when a
Resource has been removed from the system |
boolean |
retainAll(Collection<?> c) |
Document |
set(int index,
Document element) |
void |
setDocumentsList(List<Document> documentsList) |
int |
size() |
List<Document> |
subList(int fromIndex,
int toIndex) |
Object[] |
toArray() |
<T> T[] |
toArray(T[] a) |
void |
unloadDocument(Document doc)
This method does not make sense for transient corpora, so it does
nothing.
|
getDataStore, getLRPersistenceId, getParent, isModified, setDataStore, setLRPersistenceId, setParent, sync
checkParameterValues, flushBeanInfoCache, forgetBeanInfo, getBeanInfo, getInitParameterValues, getInitParameterValues, getName, getParameterValue, getParameterValue, getParameterValues, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners, toString
getFeatures, setFeatures
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
getDataStore, getLRPersistenceId, getParent, isModified, setDataStore, setLRPersistenceId, setParent, sync
getParameterValue, setParameterValue, setParameterValues
getFeatures, setFeatures
getName, setName
replaceAll, sort, spliterator
parallelStream, removeIf, stream
protected List<Document> supportList
public List<String> getDocumentNames()
getDocumentNames
in interface SimpleCorpus
List
of Strings representing the names of the
documents in this corpus.public String getDocumentName(int index)
getDocumentName
in interface SimpleCorpus
index
- the index of the documentpublic void unloadDocument(Document doc)
unloadDocument
in interface Corpus
doc
- Document to be unloaded from memory.public boolean isDocumentLoaded(int index)
isDocumentLoaded
in interface Corpus
protected void clearDocList()
public int size()
public boolean isEmpty()
public boolean contains(Object o)
public Object[] toArray()
public <T> T[] toArray(T[] a)
public boolean add(Document o)
public boolean remove(Object o)
public boolean containsAll(Collection<?> c)
containsAll
in interface Collection<Document>
containsAll
in interface List<Document>
public boolean addAll(Collection<? extends Document> c)
public boolean addAll(int index, Collection<? extends Document> c)
public boolean removeAll(Collection<?> c)
public boolean retainAll(Collection<?> c)
public void clear()
public boolean equals(Object o)
public int hashCode()
public int lastIndexOf(Object o)
lastIndexOf
in interface List<Document>
public ListIterator<Document> listIterator()
listIterator
in interface List<Document>
public ListIterator<Document> listIterator(int index)
listIterator
in interface List<Document>
public void cleanup()
cleanup
in interface Resource
cleanup
in class AbstractLanguageResource
public Resource init()
init
in interface Resource
init
in class AbstractResource
public static void populate(Corpus corpus, URL directory, FileFilter filter, String encoding, boolean recurseDirectories) throws IOException
FileFilter
to select
which files will be used and which will be ignored. A simple file
filter based on extensions is provided in the Gate distribution (
ExtensionFileFilter
).corpus
- the corpus to be populateddirectory
- the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown.filter
- the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.encoding
- the encoding to be used for reading the documentsrecurseDirectories
- should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.IOException
- if a file doesn't existpublic static void populate(Corpus corpus, URL directory, FileFilter filter, String encoding, String mimeType, boolean recurseDirectories) throws IOException
FileFilter
to select
which files will be used and which will be ignored. A simple file
filter based on extensions is provided in the Gate distribution (
ExtensionFileFilter
).corpus
- the corpus to be populateddirectory
- the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown.filter
- the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.encoding
- the encoding to be used for reading the documentsrecurseDirectories
- should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.IOException
- if a file doesn't existpublic void populate(URL directory, FileFilter filter, String encoding, boolean recurseDirectories) throws IOException, ResourceInstantiationException
populate
in interface SimpleCorpus
filter
- the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.directory
- the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown. An implementation for this method is
provided as a static method at
populate(Corpus, URL, FileFilter, String, boolean)
.encoding
- the encoding to be used for reading the documentsrecurseDirectories
- should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.IOException
ResourceInstantiationException
public void populate(URL directory, FileFilter filter, String encoding, String mimeType, boolean recurseDirectories) throws IOException, ResourceInstantiationException
populate
in interface SimpleCorpus
filter
- the file filter used to select files from the target
directory. If the filter is null all the files
will be accepted.directory
- the directory from which the files will be picked.
This parameter is an URL for uniformity. It needs to be a
URL of type file otherwise an InvalidArgumentException
will be thrown. An implementation for this method is
provided as a static method at
populate(Corpus, URL, FileFilter, String, boolean)
.encoding
- the encoding to be used for reading the documentsmimeType
- the mime type to be used when loading documents. If
null, then the mime type will be detected automatically.recurseDirectories
- should the directory be parsed
recursively?. If true all the files from the
provided directory and all its children directories (on as
many levels as necessary) will be picked if accepted by
the filter otherwise the children directories will be
ignored.IOException
ResourceInstantiationException
public static long populate(Corpus corpus, URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfDocumentsToExtract, String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException
corpus
- the corpus to be populated.singleConcatenatedFile
- the trec file.documentRootElement
- text between this element (start and
end) is considered for creating a new document.encoding
- the encoding of the trec file.numberOfDocumentsToExtract
- extracts the specified number of
documents from the trecweb file; -1 to indicate all files.mimeType
- the mime type which determines how the document is handledIOException
public long populate(URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfFilesToExtract, String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException, ResourceInstantiationException
populate
in interface SimpleCorpus
singleConcatenatedFile
- the single concatenated file to load.documentRootElement
- content between the start and end of
this element is considered for documents.encoding
- the encoding of the trec file.numberOfFilesToExtract
- indicates the number of files to
extract from the trecweb file.documentNamePrefix
- the prefix to use for document names when
creating frommimeType
- the mime type which determines how the document is handledIOException
ResourceInstantiationException
public void removeCorpusListener(CorpusListener l)
Corpus
removeCorpusListener
in interface Corpus
l
- the listener to be removed.public void addCorpusListener(CorpusListener l)
Corpus
CorpusListener
with this corpus.addCorpusListener
in interface Corpus
l
- the listener to be added.public Resource duplicate(Factory.DuplicationContext ctx) throws ResourceInstantiationException
duplicate
in interface CustomDuplication
ctx
- the current duplication context
.
If an implementation of this method needs to duplicate any
other resources as part of the custom duplication process
it should pass this context back to the two-argument form of
Factory.duplicate
rather than using the single-argument form.ResourceInstantiationException
protected void fireDocumentAdded(CorpusEvent e)
protected void fireDocumentRemoved(CorpusEvent e)
@Optional @CreoleParameter(collectionElementType=Document.class, comment="A list of GATE documents") public void setDocumentsList(List<Document> documentsList)
public void resourceLoaded(CreoleEvent e)
CreoleListener
Resource
has been loaded into the systemresourceLoaded
in interface CreoleListener
public void resourceUnloaded(CreoleEvent e)
CreoleListener
Resource
has been removed from the systemresourceUnloaded
in interface CreoleListener
public void resourceRenamed(Resource resource, String oldName, String newName)
CreoleListener
resourceRenamed
in interface CreoleListener
public void datastoreOpened(CreoleEvent e)
CreoleListener
DataStore
has been openeddatastoreOpened
in interface CreoleListener
public void datastoreCreated(CreoleEvent e)
CreoleListener
DataStore
has been createddatastoreCreated
in interface CreoleListener
public void datastoreClosed(CreoleEvent e)
CreoleListener
DataStore
has been closeddatastoreClosed
in interface CreoleListener
Copyright © 2024 GATE. All rights reserved.