@CreoleResource(name="GATE Textual Document Format", isPrivate=true, autoinstances=) public class TextualDocumentFormat extends DocumentFormat
element2StringMap, magic2mimeTypeMap, markupElementsMap, mimeString2ClassHandlerMap, mimeString2mimeTypeMap, suffixes2mimeTypeMap
dataStore, lrPersistentId
name
Constructor and Description |
---|
TextualDocumentFormat()
Default construction
|
Modifier and Type | Method and Description |
---|---|
void |
annotateParagraphs(Document aDoc,
int startOffset,
int endOffset,
String annotSetName)
This method annotates paragraphs in a GATE document.
|
DataStore |
getDataStore()
Get the data store that this LR lives in.
|
protected static boolean |
hasContentButNoValidUrl(Document doc)
This is a test to see if the GATE document has a valid URL or a
valid content.
|
Resource |
init()
Initialise this resource, and return it.
|
protected void |
setNewLineProperty(Document doc)
Check the new line sequence and set document property.
|
void |
unpackMarkup(Document doc)
Unpack the markup in the document.
|
void |
unpackMarkup(Document doc,
RepositioningInfo repInfo,
RepositioningInfo ampCodingInfo) |
addStatusListener, areEqual, decideBetweenThreeMimeTypes, decideBetweenTwoMimeTypes, fireStatusChanged, getDocumentFormat, getDocumentFormat, getDocumentFormat, getDocumentFormat, getElement2StringMap, getFeatures, getMarkupElementsMap, getMimeType, getMimeTypeForString, getShouldCollectRepositioning, getSupportedFileSuffixes, getSupportedMimeTypes, guessTypeUsingMagicNumbers, removeStatusListener, runMagicNumbers, setElement2StringMap, setFeatures, setMarkupElementsMap, setMimeType, setShouldCollectRepositioning, supportsRepositioning, unpackMarkup, willReadFromUrl
cleanup, getLRPersistenceId, getParent, isModified, setDataStore, setLRPersistenceId, setParent, sync
checkParameterValues, flushBeanInfoCache, forgetBeanInfo, getBeanInfo, getInitParameterValues, getInitParameterValues, getName, getParameterValue, getParameterValue, getParameterValues, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners, toString
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
getParameterValue, setParameterValue, setParameterValues
getName, setName
public Resource init() throws ResourceInstantiationException
init
in interface Resource
init
in class AbstractResource
ResourceInstantiationException
public void unpackMarkup(Document doc) throws DocumentFormatException
unpackMarkup
in class DocumentFormat
DocumentFormatException
public void unpackMarkup(Document doc, RepositioningInfo repInfo, RepositioningInfo ampCodingInfo) throws DocumentFormatException
unpackMarkup
in class DocumentFormat
DocumentFormatException
protected static boolean hasContentButNoValidUrl(Document doc) throws DocumentFormatException
doc
- DocumentFormatException
protected void setNewLineProperty(Document doc)
public void annotateParagraphs(Document aDoc, int startOffset, int endOffset, String annotSetName) throws DocumentFormatException
aDoc
- is the gate document on which the paragraph detection would
be performed.If it is null or its content it's null then the method woul
simply return doing nothing.startOffset
- is the index form the document content from which the
paragraph detection will startendOffset
- is the offset where the detection will end.annotSetName
- is the name of the set in which paragraph annotation
would be created.The annotation type created will be "paragraph"DocumentFormatException
public DataStore getDataStore()
AbstractLanguageResource
getDataStore
in interface LanguageResource
getDataStore
in class AbstractLanguageResource
Copyright © 2024 GATE. All rights reserved.