Module gatenlp.serialization.default_plaintext

Module that implements the various ways of how to save and load documents and change logs.

Expand source code
"""
Module that implements the various ways of how to save and load documents and change logs.
"""
from gatenlp.document import Document
from gatenlp.urlfileutils import is_url, get_str_from_url, get_bytes_from_url
from gzip import open as gopen, compress, decompress


class PlainTextSerializer:
    """ """

    @staticmethod
    def save(
        clazz,
        inst,
        to_ext=None,
        to_mem=None,
        offset_type=None,
        offset_mapper=None,
        encoding="UTF-8",
        gzip=False,
        **kwargs,
    ):
        """

        Args:
          clazz:
          inst:
          to_ext: (Default value = None)
          to_mem: (Default value = None)
          offset_type: (Default value = None)
          offset_mapper: (Default value = None)
          encoding: (Default value = "UTF-8")
          gzip: (Default value = False)
          **kwargs:

        Returns:

        """
        txt = inst.text
        if txt is None:
            txt = ""
        if to_mem:
            if gzip:
                compress(txt.encode(encoding))
            else:
                return txt
        else:
            if gzip:
                with gopen(to_ext, "wt", encoding=encoding) as outfp:
                    outfp.write(txt)
            else:
                with open(to_ext, "wt", encoding=encoding) as outfp:
                    outfp.write(txt)

    @staticmethod
    def save_gzip(clazz, inst, **kwargs):
        """

        Args:
          clazz:
          inst:
          **kwargs:

        Returns:

        """
        PlainTextSerializer.save(clazz, inst, gzip=True, **kwargs)

    @staticmethod
    def load(
        clazz,
        from_ext=None,
        from_mem=None,
        offset_mapper=None,
        encoding="UTF-8",
        gzip=False,
        **kwargs,
    ):
        """

        Args:
          clazz:
          from_ext: (Default value = None)
          from_mem: (Default value = None)
          offset_mapper: (Default value = None)
          encoding: (Default value = "UTF-8")
          gzip: (Default value = False)
          **kwargs:

        Returns:

        """
        isurl, extstr = is_url(from_ext)
        if from_ext is not None:
            if isurl:
                if gzip:
                    from_mem = get_bytes_from_url(extstr)
                else:
                    from_mem = get_str_from_url(extstr, encoding=encoding)
        if from_mem is not None:
            if gzip:
                txt = decompress(from_mem).decode(encoding)
            else:
                txt = from_mem
            doc = Document(txt)
        else:
            if gzip:
                with gopen(extstr, "rt", encoding=encoding) as infp:
                    txt = infp.read()
            else:
                with open(extstr, "rt", encoding=encoding) as infp:
                    txt = infp.read()
            doc = Document(txt)
        return doc

    @staticmethod
    def load_gzip(clazz, **kwargs):
        """

        Args:
          clazz:
          **kwargs:

        Returns:

        """
        return PlainTextSerializer.load(clazz, gzip=True, **kwargs)

Classes

class PlainTextSerializer
Expand source code
class PlainTextSerializer:
    """ """

    @staticmethod
    def save(
        clazz,
        inst,
        to_ext=None,
        to_mem=None,
        offset_type=None,
        offset_mapper=None,
        encoding="UTF-8",
        gzip=False,
        **kwargs,
    ):
        """

        Args:
          clazz:
          inst:
          to_ext: (Default value = None)
          to_mem: (Default value = None)
          offset_type: (Default value = None)
          offset_mapper: (Default value = None)
          encoding: (Default value = "UTF-8")
          gzip: (Default value = False)
          **kwargs:

        Returns:

        """
        txt = inst.text
        if txt is None:
            txt = ""
        if to_mem:
            if gzip:
                compress(txt.encode(encoding))
            else:
                return txt
        else:
            if gzip:
                with gopen(to_ext, "wt", encoding=encoding) as outfp:
                    outfp.write(txt)
            else:
                with open(to_ext, "wt", encoding=encoding) as outfp:
                    outfp.write(txt)

    @staticmethod
    def save_gzip(clazz, inst, **kwargs):
        """

        Args:
          clazz:
          inst:
          **kwargs:

        Returns:

        """
        PlainTextSerializer.save(clazz, inst, gzip=True, **kwargs)

    @staticmethod
    def load(
        clazz,
        from_ext=None,
        from_mem=None,
        offset_mapper=None,
        encoding="UTF-8",
        gzip=False,
        **kwargs,
    ):
        """

        Args:
          clazz:
          from_ext: (Default value = None)
          from_mem: (Default value = None)
          offset_mapper: (Default value = None)
          encoding: (Default value = "UTF-8")
          gzip: (Default value = False)
          **kwargs:

        Returns:

        """
        isurl, extstr = is_url(from_ext)
        if from_ext is not None:
            if isurl:
                if gzip:
                    from_mem = get_bytes_from_url(extstr)
                else:
                    from_mem = get_str_from_url(extstr, encoding=encoding)
        if from_mem is not None:
            if gzip:
                txt = decompress(from_mem).decode(encoding)
            else:
                txt = from_mem
            doc = Document(txt)
        else:
            if gzip:
                with gopen(extstr, "rt", encoding=encoding) as infp:
                    txt = infp.read()
            else:
                with open(extstr, "rt", encoding=encoding) as infp:
                    txt = infp.read()
            doc = Document(txt)
        return doc

    @staticmethod
    def load_gzip(clazz, **kwargs):
        """

        Args:
          clazz:
          **kwargs:

        Returns:

        """
        return PlainTextSerializer.load(clazz, gzip=True, **kwargs)

Static methods

def load(clazz, from_ext=None, from_mem=None, offset_mapper=None, encoding='UTF-8', gzip=False, **kwargs)

Args

clazz:
from_ext
(Default value = None)
from_mem
(Default value = None)
offset_mapper
(Default value = None)
encoding
(Default value = "UTF-8")
gzip
(Default value = False)

**kwargs: Returns:

Expand source code
@staticmethod
def load(
    clazz,
    from_ext=None,
    from_mem=None,
    offset_mapper=None,
    encoding="UTF-8",
    gzip=False,
    **kwargs,
):
    """

    Args:
      clazz:
      from_ext: (Default value = None)
      from_mem: (Default value = None)
      offset_mapper: (Default value = None)
      encoding: (Default value = "UTF-8")
      gzip: (Default value = False)
      **kwargs:

    Returns:

    """
    isurl, extstr = is_url(from_ext)
    if from_ext is not None:
        if isurl:
            if gzip:
                from_mem = get_bytes_from_url(extstr)
            else:
                from_mem = get_str_from_url(extstr, encoding=encoding)
    if from_mem is not None:
        if gzip:
            txt = decompress(from_mem).decode(encoding)
        else:
            txt = from_mem
        doc = Document(txt)
    else:
        if gzip:
            with gopen(extstr, "rt", encoding=encoding) as infp:
                txt = infp.read()
        else:
            with open(extstr, "rt", encoding=encoding) as infp:
                txt = infp.read()
        doc = Document(txt)
    return doc
def load_gzip(clazz, **kwargs)

Args

clazz: **kwargs: Returns:

Expand source code
@staticmethod
def load_gzip(clazz, **kwargs):
    """

    Args:
      clazz:
      **kwargs:

    Returns:

    """
    return PlainTextSerializer.load(clazz, gzip=True, **kwargs)
def save(clazz, inst, to_ext=None, to_mem=None, offset_type=None, offset_mapper=None, encoding='UTF-8', gzip=False, **kwargs)

Args

clazz:
inst:
to_ext
(Default value = None)
to_mem
(Default value = None)
offset_type
(Default value = None)
offset_mapper
(Default value = None)
encoding
(Default value = "UTF-8")
gzip
(Default value = False)

**kwargs: Returns:

Expand source code
@staticmethod
def save(
    clazz,
    inst,
    to_ext=None,
    to_mem=None,
    offset_type=None,
    offset_mapper=None,
    encoding="UTF-8",
    gzip=False,
    **kwargs,
):
    """

    Args:
      clazz:
      inst:
      to_ext: (Default value = None)
      to_mem: (Default value = None)
      offset_type: (Default value = None)
      offset_mapper: (Default value = None)
      encoding: (Default value = "UTF-8")
      gzip: (Default value = False)
      **kwargs:

    Returns:

    """
    txt = inst.text
    if txt is None:
        txt = ""
    if to_mem:
        if gzip:
            compress(txt.encode(encoding))
        else:
            return txt
    else:
        if gzip:
            with gopen(to_ext, "wt", encoding=encoding) as outfp:
                outfp.write(txt)
        else:
            with open(to_ext, "wt", encoding=encoding) as outfp:
                outfp.write(txt)
def save_gzip(clazz, inst, **kwargs)

Args

clazz: inst: **kwargs: Returns:

Expand source code
@staticmethod
def save_gzip(clazz, inst, **kwargs):
    """

    Args:
      clazz:
      inst:
      **kwargs:

    Returns:

    """
    PlainTextSerializer.save(clazz, inst, gzip=True, **kwargs)