Module gatenlp.serialization.default_json
Module that implements the json serialization class.
Expand source code
"""
Module that implements the json serialization class.
"""
from gatenlp.urlfileutils import is_url, get_str_from_url, get_bytes_from_url
from gzip import open as gopen, compress, decompress
import json
# import orjson as usejson
# import json as usejson
# import rapidjson as usejson
# import ujson as usejson
# import hyperjson as usejson
JSON_WRITE = "wt"
JSON_READ = "rt"
# # for replacing json by orjson
# class json:
# @staticmethod
# def load(fp):
# data = fp.read()
# return usejson.loads(data)
# @staticmethod
# def loads(data):
# return usejson.loads(data)
# @staticmethod
# def dump(obj, fp):
# buf = usejson.dumps(obj)
# fp.write(buf)
# @staticmethod
# def dumps(obj):
# return usejson.dumps(obj)
# # for replacing json with one of the other implementations
# class json:
# @staticmethod
# def load(fp):
# return usejson.load(fp)
# @staticmethod
# def loads(data):
# return usejson.loads(data)
# @staticmethod
# def dump(obj, fp):
# buf = usejson.dump(obj, fp)
# @staticmethod
# def dumps(obj):
# return usejson.dumps(obj)
# TODO: for ALL save options, allow to filter the annotations that get saved!
# TODO: then use this show only limited set of annotations in the viewer
# TODO: create Document.display(....) to show document in various ways in the current
# environment, e.g. Jupyter notebook, select anns, configure colour palette, size etc.
# TODO: when loading from a URL, allow for deciding on the format based on the mime type!
# So if we do not have the format, we should get the header for the file, check the mime type and see
# if we have a loder registered for that and then let the loader do the rest of the work. This may
# need loaders to be able to use an already open stream.
class JsonSerializer:
"""
This class performs the saving and load of Documents and ChangeLog instances to and from the
BDOC JSON format files, optionally with gzip compression.
"""
@staticmethod
def save(
clazz,
inst,
to_ext=None,
to_mem=None,
offset_type=None,
offset_mapper=None,
gzip=False,
annspec=None,
**kwargs,
):
"""
Args:
clazz: the class of the object that gets saved
inst: the object to get saved
to_ext: where to save to, this should be a file path, only one of to_ext and to_mem should be specified
to_mem: if True, return a String serialization
offset_type: the offset type to use for saving, if None (default) use "p" (Python)
offset_mapper: the offset mapper to use, only needed if the type needs to get converted
gzip: if True, the JSON gets gzip compressed
annspec: which annotation sets and types to include, list of set names or (setanme, types) tuples
**kwargs:
"""
d = inst.to_dict(offset_type=offset_type, offset_mapper=offset_mapper, annspec=annspec, **kwargs)
if to_mem:
if gzip:
compress(json.dumps(d).encode("UTF-8"))
else:
return json.dumps(d)
else:
if gzip:
with gopen(to_ext, JSON_WRITE) as outfp:
json.dump(d, outfp)
else:
with open(to_ext, JSON_WRITE) as outfp:
json.dump(d, outfp)
@staticmethod
def save_gzip(clazz, inst, **kwargs):
"""
Invokes the save method with gzip=True
"""
JsonSerializer.save(clazz, inst, gzip=True, **kwargs)
@staticmethod
def load(
clazz, from_ext=None, from_mem=None, offset_mapper=None, gzip=False, **kwargs
):
"""
Args:
clazz:
from_ext: (Default value = None)
from_mem: (Default value = None)
offset_mapper: (Default value = None)
gzip: (Default value = False)
**kwargs:
Returns:
"""
# print("RUNNING load with from_ext=", from_ext, " from_mem=", from_mem)
if from_ext is not None and from_mem is not None:
raise Exception("Exactly one of from_ext and from_mem must be specified ")
if from_ext is None and from_mem is None:
raise Exception("Exactly one of from_ext and from_mem must be specified ")
isurl, extstr = is_url(from_ext)
if from_ext is not None:
if isurl:
# print("DEBUG: we got a URL")
if gzip:
from_mem = get_bytes_from_url(extstr)
else:
from_mem = get_str_from_url(extstr, encoding="utf-8")
else:
# print("DEBUG: not a URL !!!")
pass
if from_mem is not None:
if gzip:
d = json.loads(decompress(from_mem).decode("UTF-8"))
else:
d = json.loads(from_mem)
doc = clazz.from_dict(d, offset_mapper=offset_mapper, **kwargs)
else: # from_ext must have been not None and a path
if gzip:
with gopen(extstr, JSON_READ) as infp:
d = json.load(infp)
else:
with open(extstr, JSON_READ) as infp:
d = json.load(infp)
doc = clazz.from_dict(d, offset_mapper=offset_mapper, **kwargs)
return doc
@staticmethod
def load_gzip(clazz, **kwargs):
"""
Args:
clazz:
**kwargs:
Returns:
"""
return JsonSerializer.load(clazz, gzip=True, **kwargs)
Classes
class JsonSerializer
-
This class performs the saving and load of Documents and ChangeLog instances to and from the BDOC JSON format files, optionally with gzip compression.
Expand source code
class JsonSerializer: """ This class performs the saving and load of Documents and ChangeLog instances to and from the BDOC JSON format files, optionally with gzip compression. """ @staticmethod def save( clazz, inst, to_ext=None, to_mem=None, offset_type=None, offset_mapper=None, gzip=False, annspec=None, **kwargs, ): """ Args: clazz: the class of the object that gets saved inst: the object to get saved to_ext: where to save to, this should be a file path, only one of to_ext and to_mem should be specified to_mem: if True, return a String serialization offset_type: the offset type to use for saving, if None (default) use "p" (Python) offset_mapper: the offset mapper to use, only needed if the type needs to get converted gzip: if True, the JSON gets gzip compressed annspec: which annotation sets and types to include, list of set names or (setanme, types) tuples **kwargs: """ d = inst.to_dict(offset_type=offset_type, offset_mapper=offset_mapper, annspec=annspec, **kwargs) if to_mem: if gzip: compress(json.dumps(d).encode("UTF-8")) else: return json.dumps(d) else: if gzip: with gopen(to_ext, JSON_WRITE) as outfp: json.dump(d, outfp) else: with open(to_ext, JSON_WRITE) as outfp: json.dump(d, outfp) @staticmethod def save_gzip(clazz, inst, **kwargs): """ Invokes the save method with gzip=True """ JsonSerializer.save(clazz, inst, gzip=True, **kwargs) @staticmethod def load( clazz, from_ext=None, from_mem=None, offset_mapper=None, gzip=False, **kwargs ): """ Args: clazz: from_ext: (Default value = None) from_mem: (Default value = None) offset_mapper: (Default value = None) gzip: (Default value = False) **kwargs: Returns: """ # print("RUNNING load with from_ext=", from_ext, " from_mem=", from_mem) if from_ext is not None and from_mem is not None: raise Exception("Exactly one of from_ext and from_mem must be specified ") if from_ext is None and from_mem is None: raise Exception("Exactly one of from_ext and from_mem must be specified ") isurl, extstr = is_url(from_ext) if from_ext is not None: if isurl: # print("DEBUG: we got a URL") if gzip: from_mem = get_bytes_from_url(extstr) else: from_mem = get_str_from_url(extstr, encoding="utf-8") else: # print("DEBUG: not a URL !!!") pass if from_mem is not None: if gzip: d = json.loads(decompress(from_mem).decode("UTF-8")) else: d = json.loads(from_mem) doc = clazz.from_dict(d, offset_mapper=offset_mapper, **kwargs) else: # from_ext must have been not None and a path if gzip: with gopen(extstr, JSON_READ) as infp: d = json.load(infp) else: with open(extstr, JSON_READ) as infp: d = json.load(infp) doc = clazz.from_dict(d, offset_mapper=offset_mapper, **kwargs) return doc @staticmethod def load_gzip(clazz, **kwargs): """ Args: clazz: **kwargs: Returns: """ return JsonSerializer.load(clazz, gzip=True, **kwargs)
Static methods
def load(clazz, from_ext=None, from_mem=None, offset_mapper=None, gzip=False, **kwargs)
-
Args
- clazz:
from_ext
- (Default value = None)
from_mem
- (Default value = None)
offset_mapper
- (Default value = None)
gzip
- (Default value = False)
**kwargs: Returns:
Expand source code
@staticmethod def load( clazz, from_ext=None, from_mem=None, offset_mapper=None, gzip=False, **kwargs ): """ Args: clazz: from_ext: (Default value = None) from_mem: (Default value = None) offset_mapper: (Default value = None) gzip: (Default value = False) **kwargs: Returns: """ # print("RUNNING load with from_ext=", from_ext, " from_mem=", from_mem) if from_ext is not None and from_mem is not None: raise Exception("Exactly one of from_ext and from_mem must be specified ") if from_ext is None and from_mem is None: raise Exception("Exactly one of from_ext and from_mem must be specified ") isurl, extstr = is_url(from_ext) if from_ext is not None: if isurl: # print("DEBUG: we got a URL") if gzip: from_mem = get_bytes_from_url(extstr) else: from_mem = get_str_from_url(extstr, encoding="utf-8") else: # print("DEBUG: not a URL !!!") pass if from_mem is not None: if gzip: d = json.loads(decompress(from_mem).decode("UTF-8")) else: d = json.loads(from_mem) doc = clazz.from_dict(d, offset_mapper=offset_mapper, **kwargs) else: # from_ext must have been not None and a path if gzip: with gopen(extstr, JSON_READ) as infp: d = json.load(infp) else: with open(extstr, JSON_READ) as infp: d = json.load(infp) doc = clazz.from_dict(d, offset_mapper=offset_mapper, **kwargs) return doc
def load_gzip(clazz, **kwargs)
-
Args
clazz: **kwargs: Returns:
Expand source code
@staticmethod def load_gzip(clazz, **kwargs): """ Args: clazz: **kwargs: Returns: """ return JsonSerializer.load(clazz, gzip=True, **kwargs)
def save(clazz, inst, to_ext=None, to_mem=None, offset_type=None, offset_mapper=None, gzip=False, annspec=None, **kwargs)
-
Args
clazz
- the class of the object that gets saved
inst
- the object to get saved
to_ext
- where to save to, this should be a file path, only one of to_ext and to_mem should be specified
to_mem
- if True, return a String serialization
offset_type
- the offset type to use for saving, if None (default) use "p" (Python)
offset_mapper
- the offset mapper to use, only needed if the type needs to get converted
gzip
- if True, the JSON gets gzip compressed
annspec
- which annotation sets and types to include, list of set names or (setanme, types) tuples
**kwargs:
Expand source code
@staticmethod def save( clazz, inst, to_ext=None, to_mem=None, offset_type=None, offset_mapper=None, gzip=False, annspec=None, **kwargs, ): """ Args: clazz: the class of the object that gets saved inst: the object to get saved to_ext: where to save to, this should be a file path, only one of to_ext and to_mem should be specified to_mem: if True, return a String serialization offset_type: the offset type to use for saving, if None (default) use "p" (Python) offset_mapper: the offset mapper to use, only needed if the type needs to get converted gzip: if True, the JSON gets gzip compressed annspec: which annotation sets and types to include, list of set names or (setanme, types) tuples **kwargs: """ d = inst.to_dict(offset_type=offset_type, offset_mapper=offset_mapper, annspec=annspec, **kwargs) if to_mem: if gzip: compress(json.dumps(d).encode("UTF-8")) else: return json.dumps(d) else: if gzip: with gopen(to_ext, JSON_WRITE) as outfp: json.dump(d, outfp) else: with open(to_ext, JSON_WRITE) as outfp: json.dump(d, outfp)
def save_gzip(clazz, inst, **kwargs)
-
Invokes the save method with gzip=True
Expand source code
@staticmethod def save_gzip(clazz, inst, **kwargs): """ Invokes the save method with gzip=True """ JsonSerializer.save(clazz, inst, gzip=True, **kwargs)