Module gatenlp.serialization.default_htmlannviewer
Module that implements the various ways of how to save and load documents and change logs.
Expand source code
"""
Module that implements the various ways of how to save and load documents and change logs.
"""
import os
from random import choice
from string import ascii_uppercase
from gatenlp.document import Document
from gatenlp.gatenlpconfig import gatenlpconfig
import json as jsonlib
JS_JQUERY_URL = "https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"
JS_GATENLP_URL = "https://unpkg.com/gatenlp-ann-viewer@1.0.17/gatenlp-ann-viewer.js"
JS_JQUERY = f"<script src=\"{JS_JQUERY_URL}\"></script>"
JS_GATENLP = f"<script src=\"{JS_GATENLP_URL}\"></script>"
HTML_TEMPLATE_FILE_NAME = "gatenlp-ann-viewer.html"
JS_GATENLP_FILE_NAME = "gatenlp-ann-viewer-merged.js"
html_ann_viewer_serializer_js_loaded = False
SEP = "║"
def init_javascript():
"""
Initialize the notebook/html javascript.
"""
from IPython.display import display_html # pylint: disable=C0415
display_html(HtmlAnnViewerSerializer.javascript(), raw=True)
def show_colab(
doc,
htmlid=None,
display=False,
annspec=None,
preselect=None,
palette=None,
cols4types=None,
doc_style=None,
row1_style=None,
row2_style=None
):
"""
Show htmldocumentviewe in a colab notebook.
"""
from IPython.display import display_html, Javascript # pylint: disable=C0415
from IPython.display import display as i_display # pylint: disable=C0415
i_display(Javascript(url=JS_JQUERY_URL))
i_display(Javascript(url=JS_GATENLP_URL))
html = doc.save_mem(
fmt="html-ann-viewer",
notebook=True,
add_js=False,
offline=True,
htmlid=htmlid,
annspec=annspec,
preselect=preselect,
palette=palette,
cols4types=cols4types,
stretch_height=False,
doc_style=doc_style,
row1_style=row1_style,
row2_style=row2_style,
)
if display:
display_html(html, raw=True)
return None
else:
return html
def show_notebook(
doc,
htmlid=None,
display=False,
annspec=None,
preselect=None,
palette=None,
cols4types=None,
doc_style=None,
row1_style=None,
row2_style=None
):
"""
Show htmldocumentviewer in a jupyter notebook.
"""
if not gatenlpconfig.notebook_js_initialized:
init_javascript()
gatenlpconfig.notebook_js_initialized = True
html = doc.save_mem(
fmt="html-ann-viewer",
notebook=True,
add_js=False,
offline=True,
htmlid=htmlid,
annspec=annspec,
preselect=preselect,
palette=palette,
cols4types=cols4types,
stretch_height=False,
doc_style=doc_style,
row1_style=row1_style,
row2_style=row2_style,
)
if display:
from IPython.display import display_html # pylint: disable=C0415
display_html(html, raw=True)
return None
else:
return html
class HtmlAnnViewerSerializer:
"""
Serialization class for generating HTML/Javascript to view a document in an HTML page or in a Jupyter or
Colab notebook.
"""
@staticmethod
def javascript():
"""
Return the Javascript needed for the HTML Annotation viewer.
Returns: Javascript string.
"""
jsloc = os.path.join(
os.path.dirname(__file__), "_htmlviewer", JS_GATENLP_FILE_NAME
)
if not os.path.exists(jsloc):
raise Exception(
"Could not find JavsScript file, {} does not exist".format(jsloc)
)
with open(jsloc, "rt", encoding="utf-8") as infp:
js = infp.read()
js = """<script type="text/javascript">""" + js + "</script>"
return js
@staticmethod
def save(
_clazz,
inst,
to_ext=None,
to_mem=None,
notebook=False,
offline=False,
add_js=True,
htmlid=None,
stretch_height=False,
annspec=None,
preselect=None,
palette=None,
cols4types=None,
doc_style=None,
row1_style=None,
row2_style=None,
**kwargs,
):
"""Convert a document to HTML for visualizing it.
Args:
_clazz: the class of the object to save
inst: the instance/object to save
to_ext: the destination where to save to unless to_mem is given
to_mem: if true, ignores to_ext and returns the representation
notebook: if True only create a div which can be injected into a notebook or other HTML, otherwise
generate a full HTML document
offline: if true, include all the Javascript needed in the generated HTML , otherwise load library
from the internet.
add_js: if true (default), add the necessary Javascript either directly or by loading a library from
the internet. If false, assume that the Javascript is already there (only makes sense with
notebook=True).
htmlid: the id to use for HTML ids so it is possible to have several independent viewers in the
same HTML page and to style the output from a separate notebook cell
stretch_height: if False, rows 1 and 2 of the viewer will not have the height set, but only
min and max height (default min is 10em for row1 and 7em for row2, max is the double of those).
If True, no max haight is set and instead the height is set to a percentage (default is
67vh for row 1 and 30vh for row 2). The values used can be changed via gateconfig or the
complete style for the rows can be set directly via row1_style and row2_style.
annspec: if None, include all annotation sets and types, otherwise this should be a list of either
set names, or tuples, where the first entry is a set name and the second entry is either a type
name or list of type names to include.
preselect: if not None, the set and type names to pre-select (show). This should have the same format
as the annspec parameter.
palette: if not None a list of colour codes (strings) usable in Javascript which will be used instead
of the default palette.
cols4types: if not None a dictionary mapping tuples (setname, typename) to a color. For the given
setname and typename combinations, the colours from the palette (default or specified) will be
overrriden.
doc_style: if not None, any additional styling for the document text box, if None, use whatever
is defined as gatenlpconfig.doc_html_repr_doc_style or do not use.
row1_style: the style to use for the first row of the document viewer which shows the document text and
annotation set and type panes. The default is gatenlpconfig.doc_html_repr_row1style_nostretch or
gatenlpconfig.doc_html_repr_row1style_nostretch depending on the stretch_height parameter.
row2_style: the style to use for the second row of the document viewer which shows the document or
annotation features. The default is gatenlpconfig.doc_html_repr_row2style_nostretch or
gatenlpconfig.doc_html_repr_row2style_nostretch depending on the stretch_height parameter.
kwargs: swallow any other kwargs.
Returns: if to_mem is True, returns the representation, otherwise None.
"""
if not isinstance(inst, Document):
raise Exception("Not a document!")
parms = dict(presel_set=[], presel_list=[])
doccopy = inst.deepcopy(annspec=annspec)
doccopy.to_offset_type("j")
json = doccopy.save_mem(fmt="json", **kwargs)
htmlloc = os.path.join(
os.path.dirname(__file__), "_htmlviewer", HTML_TEMPLATE_FILE_NAME
)
if not os.path.exists(htmlloc):
raise Exception(
"Could not find HTML template, {} does not exist".format(htmlloc)
)
with open(htmlloc, "rt", encoding="utf-8") as infp:
html = infp.read()
txtcolor = gatenlpconfig.doc_html_repr_txtcolor
if preselect is not None:
# create a list of set/type lists and a set of set of setSEPtype for parms
presel_set = set()
presel_list = []
for el in preselect:
if isinstance(el, str):
for anntype in doccopy.annset(el).type_names:
settype = el + SEP + anntype
if settype not in presel_set:
presel_set.add(settype)
presel_list.append([el, anntype])
elif isinstance(el, (list, tuple)) and len(el) > 1:
setname = el[0]
anntypes = el[1]
if isinstance(anntypes, str):
anntypes = [anntypes]
for anntype in anntypes:
settype = setname + SEP + anntype
if settype not in presel_set:
presel_set.add(settype)
presel_list.append([setname, anntype])
parms["presel_set"] = list(presel_set)
parms["presel_list"] = presel_list
if notebook:
str_start = "<!--STARTDIV-->"
str_end = "<!--ENDDIV-->"
idx1 = html.find(str_start) + len(str_start)
idx2 = html.find(str_end)
if htmlid:
rndpref = str(htmlid)
else:
rndpref = "".join(choice(ascii_uppercase) for _i in range(10))
html = html[idx1:idx2]
html = f"""<div><style>#{rndpref}-wrapper {{ color: {txtcolor} !important; }}</style>
<div id="{rndpref}-wrapper">
{html}
</div></div>"""
# replace the prefix with a random one
html = html.replace("GATENLPID", rndpref)
if palette is not None:
parms["palette"] = palette
if cols4types:
newdict = {}
for k, v in cols4types.items():
if not isinstance(k, tuple) or not len(k) == 2 or not isinstance(v, str):
raise Exception("cols4types: must be a dictionary mapping (setname,typename) to color string")
newdict[k[0]+SEP+k[1]] = v
parms["cols4types"] = newdict
else:
parms["cols4types"] = {}
if offline:
# global html_ann_viewer_serializer_js_loaded
# if not html_ann_viewer_serializer_js_loaded:
if add_js:
jsloc = os.path.join(
os.path.dirname(__file__), "_htmlviewer", JS_GATENLP_FILE_NAME
)
if not os.path.exists(jsloc):
raise Exception(
"Could not find JavsScript file, {} does not exist".format(
jsloc
)
)
with open(jsloc, "rt", encoding="utf-8") as infp:
js = infp.read()
js = """<script type="text/javascript">""" + js + "</script>"
# html_ann_viewer_serializer_js_loaded = True
else:
js = ""
else:
js = JS_JQUERY + JS_GATENLP
if stretch_height:
if row1_style is None:
row1_style = gatenlpconfig.doc_html_repr_row1style_stretch
if row2_style is None:
row2_style = gatenlpconfig.doc_html_repr_row2style_stretch
else:
if row1_style is None:
row1_style = gatenlpconfig.doc_html_repr_row1style_nostretch
if row2_style is None:
row2_style = gatenlpconfig.doc_html_repr_row2style_nostretch
html = html.replace("$$JAVASCRIPT$$", js, 1).replace("$$JSONDATA$$", json, 1)
html = html.replace("$$JSONPARMS$$", jsonlib.dumps(parms), 1)
html = html.replace("$$ROW1STYLE$$", row1_style, 1).replace(
"$$ROW2STYLE$$", row2_style, 1
)
if doc_style is None:
doc_style = gatenlpconfig.doc_html_repr_doc_style
if doc_style is None:
doc_style = ""
html = html.replace("$$DOCTEXTSTYLE$$", doc_style, 1)
if to_mem:
return html
else:
with open(to_ext, "wt", encoding="utf-8") as outfp:
outfp.write(html)
Functions
def init_javascript()
-
Initialize the notebook/html javascript.
Expand source code
def init_javascript(): """ Initialize the notebook/html javascript. """ from IPython.display import display_html # pylint: disable=C0415 display_html(HtmlAnnViewerSerializer.javascript(), raw=True)
def show_colab(doc, htmlid=None, display=False, annspec=None, preselect=None, palette=None, cols4types=None, doc_style=None, row1_style=None, row2_style=None)
-
Show htmldocumentviewe in a colab notebook.
Expand source code
def show_colab( doc, htmlid=None, display=False, annspec=None, preselect=None, palette=None, cols4types=None, doc_style=None, row1_style=None, row2_style=None ): """ Show htmldocumentviewe in a colab notebook. """ from IPython.display import display_html, Javascript # pylint: disable=C0415 from IPython.display import display as i_display # pylint: disable=C0415 i_display(Javascript(url=JS_JQUERY_URL)) i_display(Javascript(url=JS_GATENLP_URL)) html = doc.save_mem( fmt="html-ann-viewer", notebook=True, add_js=False, offline=True, htmlid=htmlid, annspec=annspec, preselect=preselect, palette=palette, cols4types=cols4types, stretch_height=False, doc_style=doc_style, row1_style=row1_style, row2_style=row2_style, ) if display: display_html(html, raw=True) return None else: return html
def show_notebook(doc, htmlid=None, display=False, annspec=None, preselect=None, palette=None, cols4types=None, doc_style=None, row1_style=None, row2_style=None)
-
Show htmldocumentviewer in a jupyter notebook.
Expand source code
def show_notebook( doc, htmlid=None, display=False, annspec=None, preselect=None, palette=None, cols4types=None, doc_style=None, row1_style=None, row2_style=None ): """ Show htmldocumentviewer in a jupyter notebook. """ if not gatenlpconfig.notebook_js_initialized: init_javascript() gatenlpconfig.notebook_js_initialized = True html = doc.save_mem( fmt="html-ann-viewer", notebook=True, add_js=False, offline=True, htmlid=htmlid, annspec=annspec, preselect=preselect, palette=palette, cols4types=cols4types, stretch_height=False, doc_style=doc_style, row1_style=row1_style, row2_style=row2_style, ) if display: from IPython.display import display_html # pylint: disable=C0415 display_html(html, raw=True) return None else: return html
Classes
class HtmlAnnViewerSerializer
-
Serialization class for generating HTML/Javascript to view a document in an HTML page or in a Jupyter or Colab notebook.
Expand source code
class HtmlAnnViewerSerializer: """ Serialization class for generating HTML/Javascript to view a document in an HTML page or in a Jupyter or Colab notebook. """ @staticmethod def javascript(): """ Return the Javascript needed for the HTML Annotation viewer. Returns: Javascript string. """ jsloc = os.path.join( os.path.dirname(__file__), "_htmlviewer", JS_GATENLP_FILE_NAME ) if not os.path.exists(jsloc): raise Exception( "Could not find JavsScript file, {} does not exist".format(jsloc) ) with open(jsloc, "rt", encoding="utf-8") as infp: js = infp.read() js = """<script type="text/javascript">""" + js + "</script>" return js @staticmethod def save( _clazz, inst, to_ext=None, to_mem=None, notebook=False, offline=False, add_js=True, htmlid=None, stretch_height=False, annspec=None, preselect=None, palette=None, cols4types=None, doc_style=None, row1_style=None, row2_style=None, **kwargs, ): """Convert a document to HTML for visualizing it. Args: _clazz: the class of the object to save inst: the instance/object to save to_ext: the destination where to save to unless to_mem is given to_mem: if true, ignores to_ext and returns the representation notebook: if True only create a div which can be injected into a notebook or other HTML, otherwise generate a full HTML document offline: if true, include all the Javascript needed in the generated HTML , otherwise load library from the internet. add_js: if true (default), add the necessary Javascript either directly or by loading a library from the internet. If false, assume that the Javascript is already there (only makes sense with notebook=True). htmlid: the id to use for HTML ids so it is possible to have several independent viewers in the same HTML page and to style the output from a separate notebook cell stretch_height: if False, rows 1 and 2 of the viewer will not have the height set, but only min and max height (default min is 10em for row1 and 7em for row2, max is the double of those). If True, no max haight is set and instead the height is set to a percentage (default is 67vh for row 1 and 30vh for row 2). The values used can be changed via gateconfig or the complete style for the rows can be set directly via row1_style and row2_style. annspec: if None, include all annotation sets and types, otherwise this should be a list of either set names, or tuples, where the first entry is a set name and the second entry is either a type name or list of type names to include. preselect: if not None, the set and type names to pre-select (show). This should have the same format as the annspec parameter. palette: if not None a list of colour codes (strings) usable in Javascript which will be used instead of the default palette. cols4types: if not None a dictionary mapping tuples (setname, typename) to a color. For the given setname and typename combinations, the colours from the palette (default or specified) will be overrriden. doc_style: if not None, any additional styling for the document text box, if None, use whatever is defined as gatenlpconfig.doc_html_repr_doc_style or do not use. row1_style: the style to use for the first row of the document viewer which shows the document text and annotation set and type panes. The default is gatenlpconfig.doc_html_repr_row1style_nostretch or gatenlpconfig.doc_html_repr_row1style_nostretch depending on the stretch_height parameter. row2_style: the style to use for the second row of the document viewer which shows the document or annotation features. The default is gatenlpconfig.doc_html_repr_row2style_nostretch or gatenlpconfig.doc_html_repr_row2style_nostretch depending on the stretch_height parameter. kwargs: swallow any other kwargs. Returns: if to_mem is True, returns the representation, otherwise None. """ if not isinstance(inst, Document): raise Exception("Not a document!") parms = dict(presel_set=[], presel_list=[]) doccopy = inst.deepcopy(annspec=annspec) doccopy.to_offset_type("j") json = doccopy.save_mem(fmt="json", **kwargs) htmlloc = os.path.join( os.path.dirname(__file__), "_htmlviewer", HTML_TEMPLATE_FILE_NAME ) if not os.path.exists(htmlloc): raise Exception( "Could not find HTML template, {} does not exist".format(htmlloc) ) with open(htmlloc, "rt", encoding="utf-8") as infp: html = infp.read() txtcolor = gatenlpconfig.doc_html_repr_txtcolor if preselect is not None: # create a list of set/type lists and a set of set of setSEPtype for parms presel_set = set() presel_list = [] for el in preselect: if isinstance(el, str): for anntype in doccopy.annset(el).type_names: settype = el + SEP + anntype if settype not in presel_set: presel_set.add(settype) presel_list.append([el, anntype]) elif isinstance(el, (list, tuple)) and len(el) > 1: setname = el[0] anntypes = el[1] if isinstance(anntypes, str): anntypes = [anntypes] for anntype in anntypes: settype = setname + SEP + anntype if settype not in presel_set: presel_set.add(settype) presel_list.append([setname, anntype]) parms["presel_set"] = list(presel_set) parms["presel_list"] = presel_list if notebook: str_start = "<!--STARTDIV-->" str_end = "<!--ENDDIV-->" idx1 = html.find(str_start) + len(str_start) idx2 = html.find(str_end) if htmlid: rndpref = str(htmlid) else: rndpref = "".join(choice(ascii_uppercase) for _i in range(10)) html = html[idx1:idx2] html = f"""<div><style>#{rndpref}-wrapper {{ color: {txtcolor} !important; }}</style> <div id="{rndpref}-wrapper"> {html} </div></div>""" # replace the prefix with a random one html = html.replace("GATENLPID", rndpref) if palette is not None: parms["palette"] = palette if cols4types: newdict = {} for k, v in cols4types.items(): if not isinstance(k, tuple) or not len(k) == 2 or not isinstance(v, str): raise Exception("cols4types: must be a dictionary mapping (setname,typename) to color string") newdict[k[0]+SEP+k[1]] = v parms["cols4types"] = newdict else: parms["cols4types"] = {} if offline: # global html_ann_viewer_serializer_js_loaded # if not html_ann_viewer_serializer_js_loaded: if add_js: jsloc = os.path.join( os.path.dirname(__file__), "_htmlviewer", JS_GATENLP_FILE_NAME ) if not os.path.exists(jsloc): raise Exception( "Could not find JavsScript file, {} does not exist".format( jsloc ) ) with open(jsloc, "rt", encoding="utf-8") as infp: js = infp.read() js = """<script type="text/javascript">""" + js + "</script>" # html_ann_viewer_serializer_js_loaded = True else: js = "" else: js = JS_JQUERY + JS_GATENLP if stretch_height: if row1_style is None: row1_style = gatenlpconfig.doc_html_repr_row1style_stretch if row2_style is None: row2_style = gatenlpconfig.doc_html_repr_row2style_stretch else: if row1_style is None: row1_style = gatenlpconfig.doc_html_repr_row1style_nostretch if row2_style is None: row2_style = gatenlpconfig.doc_html_repr_row2style_nostretch html = html.replace("$$JAVASCRIPT$$", js, 1).replace("$$JSONDATA$$", json, 1) html = html.replace("$$JSONPARMS$$", jsonlib.dumps(parms), 1) html = html.replace("$$ROW1STYLE$$", row1_style, 1).replace( "$$ROW2STYLE$$", row2_style, 1 ) if doc_style is None: doc_style = gatenlpconfig.doc_html_repr_doc_style if doc_style is None: doc_style = "" html = html.replace("$$DOCTEXTSTYLE$$", doc_style, 1) if to_mem: return html else: with open(to_ext, "wt", encoding="utf-8") as outfp: outfp.write(html)
Static methods
def javascript()
-
Return the Javascript needed for the HTML Annotation viewer.
Returns: Javascript string.
Expand source code
@staticmethod def javascript(): """ Return the Javascript needed for the HTML Annotation viewer. Returns: Javascript string. """ jsloc = os.path.join( os.path.dirname(__file__), "_htmlviewer", JS_GATENLP_FILE_NAME ) if not os.path.exists(jsloc): raise Exception( "Could not find JavsScript file, {} does not exist".format(jsloc) ) with open(jsloc, "rt", encoding="utf-8") as infp: js = infp.read() js = """<script type="text/javascript">""" + js + "</script>" return js
def save(_clazz, inst, to_ext=None, to_mem=None, notebook=False, offline=False, add_js=True, htmlid=None, stretch_height=False, annspec=None, preselect=None, palette=None, cols4types=None, doc_style=None, row1_style=None, row2_style=None, **kwargs)
-
Convert a document to HTML for visualizing it.
Args
_clazz
- the class of the object to save
inst
- the instance/object to save
to_ext
- the destination where to save to unless to_mem is given
to_mem
- if true, ignores to_ext and returns the representation
notebook
- if True only create a div which can be injected into a notebook or other HTML, otherwise generate a full HTML document
offline
- if true, include all the Javascript needed in the generated HTML , otherwise load library from the internet.
add_js
- if true (default), add the necessary Javascript either directly or by loading a library from the internet. If false, assume that the Javascript is already there (only makes sense with notebook=True).
htmlid
- the id to use for HTML ids so it is possible to have several independent viewers in the same HTML page and to style the output from a separate notebook cell
stretch_height
- if False, rows 1 and 2 of the viewer will not have the height set, but only min and max height (default min is 10em for row1 and 7em for row2, max is the double of those). If True, no max haight is set and instead the height is set to a percentage (default is 67vh for row 1 and 30vh for row 2). The values used can be changed via gateconfig or the complete style for the rows can be set directly via row1_style and row2_style.
annspec
- if None, include all annotation sets and types, otherwise this should be a list of either set names, or tuples, where the first entry is a set name and the second entry is either a type name or list of type names to include.
preselect
- if not None, the set and type names to pre-select (show). This should have the same format as the annspec parameter.
palette
- if not None a list of colour codes (strings) usable in Javascript which will be used instead of the default palette.
cols4types
- if not None a dictionary mapping tuples (setname, typename) to a color. For the given setname and typename combinations, the colours from the palette (default or specified) will be overrriden.
doc_style
- if not None, any additional styling for the document text box, if None, use whatever is defined as gatenlpconfig.doc_html_repr_doc_style or do not use.
row1_style
- the style to use for the first row of the document viewer which shows the document text and annotation set and type panes. The default is gatenlpconfig.doc_html_repr_row1style_nostretch or gatenlpconfig.doc_html_repr_row1style_nostretch depending on the stretch_height parameter.
row2_style
- the style to use for the second row of the document viewer which shows the document or annotation features. The default is gatenlpconfig.doc_html_repr_row2style_nostretch or gatenlpconfig.doc_html_repr_row2style_nostretch depending on the stretch_height parameter.
kwargs
- swallow any other kwargs.
Returns: if to_mem is True, returns the representation, otherwise None.
Expand source code
@staticmethod def save( _clazz, inst, to_ext=None, to_mem=None, notebook=False, offline=False, add_js=True, htmlid=None, stretch_height=False, annspec=None, preselect=None, palette=None, cols4types=None, doc_style=None, row1_style=None, row2_style=None, **kwargs, ): """Convert a document to HTML for visualizing it. Args: _clazz: the class of the object to save inst: the instance/object to save to_ext: the destination where to save to unless to_mem is given to_mem: if true, ignores to_ext and returns the representation notebook: if True only create a div which can be injected into a notebook or other HTML, otherwise generate a full HTML document offline: if true, include all the Javascript needed in the generated HTML , otherwise load library from the internet. add_js: if true (default), add the necessary Javascript either directly or by loading a library from the internet. If false, assume that the Javascript is already there (only makes sense with notebook=True). htmlid: the id to use for HTML ids so it is possible to have several independent viewers in the same HTML page and to style the output from a separate notebook cell stretch_height: if False, rows 1 and 2 of the viewer will not have the height set, but only min and max height (default min is 10em for row1 and 7em for row2, max is the double of those). If True, no max haight is set and instead the height is set to a percentage (default is 67vh for row 1 and 30vh for row 2). The values used can be changed via gateconfig or the complete style for the rows can be set directly via row1_style and row2_style. annspec: if None, include all annotation sets and types, otherwise this should be a list of either set names, or tuples, where the first entry is a set name and the second entry is either a type name or list of type names to include. preselect: if not None, the set and type names to pre-select (show). This should have the same format as the annspec parameter. palette: if not None a list of colour codes (strings) usable in Javascript which will be used instead of the default palette. cols4types: if not None a dictionary mapping tuples (setname, typename) to a color. For the given setname and typename combinations, the colours from the palette (default or specified) will be overrriden. doc_style: if not None, any additional styling for the document text box, if None, use whatever is defined as gatenlpconfig.doc_html_repr_doc_style or do not use. row1_style: the style to use for the first row of the document viewer which shows the document text and annotation set and type panes. The default is gatenlpconfig.doc_html_repr_row1style_nostretch or gatenlpconfig.doc_html_repr_row1style_nostretch depending on the stretch_height parameter. row2_style: the style to use for the second row of the document viewer which shows the document or annotation features. The default is gatenlpconfig.doc_html_repr_row2style_nostretch or gatenlpconfig.doc_html_repr_row2style_nostretch depending on the stretch_height parameter. kwargs: swallow any other kwargs. Returns: if to_mem is True, returns the representation, otherwise None. """ if not isinstance(inst, Document): raise Exception("Not a document!") parms = dict(presel_set=[], presel_list=[]) doccopy = inst.deepcopy(annspec=annspec) doccopy.to_offset_type("j") json = doccopy.save_mem(fmt="json", **kwargs) htmlloc = os.path.join( os.path.dirname(__file__), "_htmlviewer", HTML_TEMPLATE_FILE_NAME ) if not os.path.exists(htmlloc): raise Exception( "Could not find HTML template, {} does not exist".format(htmlloc) ) with open(htmlloc, "rt", encoding="utf-8") as infp: html = infp.read() txtcolor = gatenlpconfig.doc_html_repr_txtcolor if preselect is not None: # create a list of set/type lists and a set of set of setSEPtype for parms presel_set = set() presel_list = [] for el in preselect: if isinstance(el, str): for anntype in doccopy.annset(el).type_names: settype = el + SEP + anntype if settype not in presel_set: presel_set.add(settype) presel_list.append([el, anntype]) elif isinstance(el, (list, tuple)) and len(el) > 1: setname = el[0] anntypes = el[1] if isinstance(anntypes, str): anntypes = [anntypes] for anntype in anntypes: settype = setname + SEP + anntype if settype not in presel_set: presel_set.add(settype) presel_list.append([setname, anntype]) parms["presel_set"] = list(presel_set) parms["presel_list"] = presel_list if notebook: str_start = "<!--STARTDIV-->" str_end = "<!--ENDDIV-->" idx1 = html.find(str_start) + len(str_start) idx2 = html.find(str_end) if htmlid: rndpref = str(htmlid) else: rndpref = "".join(choice(ascii_uppercase) for _i in range(10)) html = html[idx1:idx2] html = f"""<div><style>#{rndpref}-wrapper {{ color: {txtcolor} !important; }}</style> <div id="{rndpref}-wrapper"> {html} </div></div>""" # replace the prefix with a random one html = html.replace("GATENLPID", rndpref) if palette is not None: parms["palette"] = palette if cols4types: newdict = {} for k, v in cols4types.items(): if not isinstance(k, tuple) or not len(k) == 2 or not isinstance(v, str): raise Exception("cols4types: must be a dictionary mapping (setname,typename) to color string") newdict[k[0]+SEP+k[1]] = v parms["cols4types"] = newdict else: parms["cols4types"] = {} if offline: # global html_ann_viewer_serializer_js_loaded # if not html_ann_viewer_serializer_js_loaded: if add_js: jsloc = os.path.join( os.path.dirname(__file__), "_htmlviewer", JS_GATENLP_FILE_NAME ) if not os.path.exists(jsloc): raise Exception( "Could not find JavsScript file, {} does not exist".format( jsloc ) ) with open(jsloc, "rt", encoding="utf-8") as infp: js = infp.read() js = """<script type="text/javascript">""" + js + "</script>" # html_ann_viewer_serializer_js_loaded = True else: js = "" else: js = JS_JQUERY + JS_GATENLP if stretch_height: if row1_style is None: row1_style = gatenlpconfig.doc_html_repr_row1style_stretch if row2_style is None: row2_style = gatenlpconfig.doc_html_repr_row2style_stretch else: if row1_style is None: row1_style = gatenlpconfig.doc_html_repr_row1style_nostretch if row2_style is None: row2_style = gatenlpconfig.doc_html_repr_row2style_nostretch html = html.replace("$$JAVASCRIPT$$", js, 1).replace("$$JSONDATA$$", json, 1) html = html.replace("$$JSONPARMS$$", jsonlib.dumps(parms), 1) html = html.replace("$$ROW1STYLE$$", row1_style, 1).replace( "$$ROW2STYLE$$", row2_style, 1 ) if doc_style is None: doc_style = gatenlpconfig.doc_html_repr_doc_style if doc_style is None: doc_style = "" html = html.replace("$$DOCTEXTSTYLE$$", doc_style, 1) if to_mem: return html else: with open(to_ext, "wt", encoding="utf-8") as outfp: outfp.write(html)