Module gatenlp.features

Module that implements class Feature for representing features.

Expand source code
"""
Module that implements class Feature for representing features.
"""
# Implementation note: Features should behave much like a dict. However, inheriting from dict
# is problematic, because dict has an odd way to implement interdependent methods, e.g. ne and eq
# are implemented separately, so the inheriting class would need to implement them separately too.
# Similar for clear and __delitem__ and other method pairs.
#
# Possible alternate approaches:
# * implement everything ourselves
# * inherit from collections.abc.MutableMapping and implement delitem, getitem, setitem, iter, len and repr
# * inherit from collections.UserDict and implement delitem, setitem and repr
#   This one IS NOT a dict but WRAPS around a dict which is accessible as self.data
# Since Features is meant to be pretty close to how a dict works and originally was implemented by
# wrapping an actual dict, the collections.UserDict approach seems to be more adequate.

from collections import UserDict
import copy as lib_copy


class Features(UserDict):
    """
    A dict-like class for storing features, which are mappings from string feature names to
    arbitrary feature values. If the Features instance is a field in another object where
    changes are getting logged in a change log, it should pass on the logger, a method for
    logging feature changes. Any copy of an instance of Features will not receive the logger,
    in order to make sure that logging happens, the instance stored in the original owning
    object must be used.
    """

    def __init__(self, *args, _change_logger=None, _deepcopy=False, **kwargs):
        """
        Initialize a Features object.

        Args:
            args: zero or one positional arguments which can be another Feature object, a mapping,
                or an iterable of key value pairs.
            _change_logger: a function for logging any changes to the feature map. This should be
                a method implemented in the owning object. It should take the following parameters:
                command, featurename, featurevalue. NOTE: this is not related to the usual logging
                from the loggin package, but for making use of tracking changes in a ChangeLog!
            _deepcopy: if False (default) the dictionary wrapped by this Features object is a shallow
                copy of any original dictionary object. If True, all the values are deep-copied.
            **kwargs: any number of additional keyword arguments which are used to set features.
        """
        self._logger = _change_logger
        if _deepcopy:
            kws = lib_copy.deepcopy(kwargs)
        else:
            kws = kwargs
        if len(args) == 1:
            posarg = args[0]
            if isinstance(posarg, Features):
                if _deepcopy:
                    data = lib_copy.deepcopy(posarg.data)
                else:
                    data = posarg.data
                super().__init__(data, **kws)
            else:
                if _deepcopy:
                    data = lib_copy.deepcopy(posarg)
                else:
                    data = posarg
                super().__init__(data, **kws)
        else:
            super().__init__(**kws)

    def __delitem__(self, featurename):
        """
        Remove the feature with the given feature name. This raises a key error if featurename is
        not in the Features. To silently remove a key, if it exists, use `pop(fname, None)`

        Args:
            featurename: name of the feature to remove
        """
        if self._logger:
            self._logger("feature:remove", feature=featurename)
        del self.data[featurename]

    def __repr__(self):
        """
        Return string representation of the Features object.
        """
        return f"Features({self.data.__repr__()})"

    def __setitem__(self, featurename, featurevalue):
        """
        Set a feature with the given name to the given value.

        Args:
            featurename: feature name, must be string
            featurevalue:  feature value
        """
        if featurename is None or not isinstance(featurename, str):
            raise Exception(
                "A feature name must be a string, not {}".format(type(featurename))
            )
        if self._logger:
            self._logger("feature:set", feature=featurename, value=featurevalue)
        self.data[featurename] = featurevalue

    def clear(self):
        """
        Remove all features.
        """
        if self._logger:
            self._logger("features:clear")
        self.data.clear()

    def copy(self, deep=False):
        """
        Return a shallow (or deep if deep=True) copy of the features. The result is another
        instance of Features which is detached from the owner and which does not log
        the changes. However, if the copy is shallow and feature values are references
        to mutable objects, they can still get modified in the original set (without
        any logging!).

        Args:
          deep: if True return a deep instead of a shallow copy of the features. (Default value = False)

        Returns:
          a dictionary with the features
        """
        ret = Features()
        if deep:
            ret.data = deep(self.data)
        else:
            ret.data = self.data.copy()
        ret._logger = None
        return ret

    def to_dict(self, deepcopy=False, include_internal=False, memo=None):
        """
        Return a dictionary representation of the features. The returned dictionary is always a shallow
        copy of the original dictionary of features, but will be a deep copy if the parameter `deepcopy` is True.

        Note:
            Features with names that start with two underscores are considered "internal/transient" features
            and not saved. Features with names that start with one underscore are considered "internal" but
            do get saved/serialized.

        Args:
            deepcopy: if True, the dictionary is a deep copy so that mutable objects
                in the original are unaffected if they get modified in the copy. (Default value = False)
            include_internal: if True, all features, even those with names that start with double underscore ("__")
                are included in the result dictionary, these features are usually dropped.
            memo: if deepcopy is True, the memo object to use for deepcopy, if any

        Returns:
          the dict representation of the features

        """
        ret = dict()
        for k, v in self.data.items():
            if not include_internal and k.startswith("__"):
                continue
            if deepcopy:
                ret[k] = lib_copy.deepcopy(v)
            else:
                ret[k] = v
        return ret

    @staticmethod
    def from_dict(thedict, deepcopy=False, memo=None):
        """
        Create a Features instance from a dictionary. If deepcopy is True, a deepcopy is created.

        NOTE: no checks are done to make sure that feature names are string only!

        Args:
          thedict: the dictionary from which to create the Features.
          deepcopy: if True and copy is True, use a deep copy of the dictionary (Default value = False)
          memo: if deepcopy is True, the memo object to use for deepcopying

        Returns:
          the Features instance

        """
        ret = Features()
        if deepcopy:
            ret.data = lib_copy.deepcopy(thedict, memo=memo)
        else:
            ret.data = thedict.copy()
        return ret

    def names(self):
        """
        Returns a list of all feature names.
        """
        return list(self.keys())

Classes

class Features (*args, **kwargs)

A dict-like class for storing features, which are mappings from string feature names to arbitrary feature values. If the Features instance is a field in another object where changes are getting logged in a change log, it should pass on the logger, a method for logging feature changes. Any copy of an instance of Features will not receive the logger, in order to make sure that logging happens, the instance stored in the original owning object must be used.

Initialize a Features object.

Args

args
zero or one positional arguments which can be another Feature object, a mapping, or an iterable of key value pairs.
_change_logger
a function for logging any changes to the feature map. This should be a method implemented in the owning object. It should take the following parameters: command, featurename, featurevalue. NOTE: this is not related to the usual logging from the loggin package, but for making use of tracking changes in a ChangeLog!
_deepcopy
if False (default) the dictionary wrapped by this Features object is a shallow copy of any original dictionary object. If True, all the values are deep-copied.
**kwargs
any number of additional keyword arguments which are used to set features.
Expand source code
class Features(UserDict):
    """
    A dict-like class for storing features, which are mappings from string feature names to
    arbitrary feature values. If the Features instance is a field in another object where
    changes are getting logged in a change log, it should pass on the logger, a method for
    logging feature changes. Any copy of an instance of Features will not receive the logger,
    in order to make sure that logging happens, the instance stored in the original owning
    object must be used.
    """

    def __init__(self, *args, _change_logger=None, _deepcopy=False, **kwargs):
        """
        Initialize a Features object.

        Args:
            args: zero or one positional arguments which can be another Feature object, a mapping,
                or an iterable of key value pairs.
            _change_logger: a function for logging any changes to the feature map. This should be
                a method implemented in the owning object. It should take the following parameters:
                command, featurename, featurevalue. NOTE: this is not related to the usual logging
                from the loggin package, but for making use of tracking changes in a ChangeLog!
            _deepcopy: if False (default) the dictionary wrapped by this Features object is a shallow
                copy of any original dictionary object. If True, all the values are deep-copied.
            **kwargs: any number of additional keyword arguments which are used to set features.
        """
        self._logger = _change_logger
        if _deepcopy:
            kws = lib_copy.deepcopy(kwargs)
        else:
            kws = kwargs
        if len(args) == 1:
            posarg = args[0]
            if isinstance(posarg, Features):
                if _deepcopy:
                    data = lib_copy.deepcopy(posarg.data)
                else:
                    data = posarg.data
                super().__init__(data, **kws)
            else:
                if _deepcopy:
                    data = lib_copy.deepcopy(posarg)
                else:
                    data = posarg
                super().__init__(data, **kws)
        else:
            super().__init__(**kws)

    def __delitem__(self, featurename):
        """
        Remove the feature with the given feature name. This raises a key error if featurename is
        not in the Features. To silently remove a key, if it exists, use `pop(fname, None)`

        Args:
            featurename: name of the feature to remove
        """
        if self._logger:
            self._logger("feature:remove", feature=featurename)
        del self.data[featurename]

    def __repr__(self):
        """
        Return string representation of the Features object.
        """
        return f"Features({self.data.__repr__()})"

    def __setitem__(self, featurename, featurevalue):
        """
        Set a feature with the given name to the given value.

        Args:
            featurename: feature name, must be string
            featurevalue:  feature value
        """
        if featurename is None or not isinstance(featurename, str):
            raise Exception(
                "A feature name must be a string, not {}".format(type(featurename))
            )
        if self._logger:
            self._logger("feature:set", feature=featurename, value=featurevalue)
        self.data[featurename] = featurevalue

    def clear(self):
        """
        Remove all features.
        """
        if self._logger:
            self._logger("features:clear")
        self.data.clear()

    def copy(self, deep=False):
        """
        Return a shallow (or deep if deep=True) copy of the features. The result is another
        instance of Features which is detached from the owner and which does not log
        the changes. However, if the copy is shallow and feature values are references
        to mutable objects, they can still get modified in the original set (without
        any logging!).

        Args:
          deep: if True return a deep instead of a shallow copy of the features. (Default value = False)

        Returns:
          a dictionary with the features
        """
        ret = Features()
        if deep:
            ret.data = deep(self.data)
        else:
            ret.data = self.data.copy()
        ret._logger = None
        return ret

    def to_dict(self, deepcopy=False, include_internal=False, memo=None):
        """
        Return a dictionary representation of the features. The returned dictionary is always a shallow
        copy of the original dictionary of features, but will be a deep copy if the parameter `deepcopy` is True.

        Note:
            Features with names that start with two underscores are considered "internal/transient" features
            and not saved. Features with names that start with one underscore are considered "internal" but
            do get saved/serialized.

        Args:
            deepcopy: if True, the dictionary is a deep copy so that mutable objects
                in the original are unaffected if they get modified in the copy. (Default value = False)
            include_internal: if True, all features, even those with names that start with double underscore ("__")
                are included in the result dictionary, these features are usually dropped.
            memo: if deepcopy is True, the memo object to use for deepcopy, if any

        Returns:
          the dict representation of the features

        """
        ret = dict()
        for k, v in self.data.items():
            if not include_internal and k.startswith("__"):
                continue
            if deepcopy:
                ret[k] = lib_copy.deepcopy(v)
            else:
                ret[k] = v
        return ret

    @staticmethod
    def from_dict(thedict, deepcopy=False, memo=None):
        """
        Create a Features instance from a dictionary. If deepcopy is True, a deepcopy is created.

        NOTE: no checks are done to make sure that feature names are string only!

        Args:
          thedict: the dictionary from which to create the Features.
          deepcopy: if True and copy is True, use a deep copy of the dictionary (Default value = False)
          memo: if deepcopy is True, the memo object to use for deepcopying

        Returns:
          the Features instance

        """
        ret = Features()
        if deepcopy:
            ret.data = lib_copy.deepcopy(thedict, memo=memo)
        else:
            ret.data = thedict.copy()
        return ret

    def names(self):
        """
        Returns a list of all feature names.
        """
        return list(self.keys())

Ancestors

  • collections.UserDict
  • collections.abc.MutableMapping
  • collections.abc.Mapping
  • collections.abc.Collection
  • collections.abc.Sized
  • collections.abc.Iterable
  • collections.abc.Container

Static methods

def from_dict(thedict, deepcopy=False, memo=None)

Create a Features instance from a dictionary. If deepcopy is True, a deepcopy is created.

NOTE: no checks are done to make sure that feature names are string only!

Args

thedict
the dictionary from which to create the Features.
deepcopy
if True and copy is True, use a deep copy of the dictionary (Default value = False)
memo
if deepcopy is True, the memo object to use for deepcopying

Returns

the Features instance

Expand source code
@staticmethod
def from_dict(thedict, deepcopy=False, memo=None):
    """
    Create a Features instance from a dictionary. If deepcopy is True, a deepcopy is created.

    NOTE: no checks are done to make sure that feature names are string only!

    Args:
      thedict: the dictionary from which to create the Features.
      deepcopy: if True and copy is True, use a deep copy of the dictionary (Default value = False)
      memo: if deepcopy is True, the memo object to use for deepcopying

    Returns:
      the Features instance

    """
    ret = Features()
    if deepcopy:
        ret.data = lib_copy.deepcopy(thedict, memo=memo)
    else:
        ret.data = thedict.copy()
    return ret

Methods

def clear(self)

Remove all features.

Expand source code
def clear(self):
    """
    Remove all features.
    """
    if self._logger:
        self._logger("features:clear")
    self.data.clear()
def copy(self, deep=False)

Return a shallow (or deep if deep=True) copy of the features. The result is another instance of Features which is detached from the owner and which does not log the changes. However, if the copy is shallow and feature values are references to mutable objects, they can still get modified in the original set (without any logging!).

Args

deep
if True return a deep instead of a shallow copy of the features. (Default value = False)

Returns

a dictionary with the features

Expand source code
def copy(self, deep=False):
    """
    Return a shallow (or deep if deep=True) copy of the features. The result is another
    instance of Features which is detached from the owner and which does not log
    the changes. However, if the copy is shallow and feature values are references
    to mutable objects, they can still get modified in the original set (without
    any logging!).

    Args:
      deep: if True return a deep instead of a shallow copy of the features. (Default value = False)

    Returns:
      a dictionary with the features
    """
    ret = Features()
    if deep:
        ret.data = deep(self.data)
    else:
        ret.data = self.data.copy()
    ret._logger = None
    return ret
def names(self)

Returns a list of all feature names.

Expand source code
def names(self):
    """
    Returns a list of all feature names.
    """
    return list(self.keys())
def to_dict(self, deepcopy=False, include_internal=False, memo=None)

Return a dictionary representation of the features. The returned dictionary is always a shallow copy of the original dictionary of features, but will be a deep copy if the parameter deepcopy is True.

Note

Features with names that start with two underscores are considered "internal/transient" features and not saved. Features with names that start with one underscore are considered "internal" but do get saved/serialized.

Args

deepcopy
if True, the dictionary is a deep copy so that mutable objects in the original are unaffected if they get modified in the copy. (Default value = False)
include_internal
if True, all features, even those with names that start with double underscore ("__") are included in the result dictionary, these features are usually dropped.
memo
if deepcopy is True, the memo object to use for deepcopy, if any

Returns

the dict representation of the features

Expand source code
def to_dict(self, deepcopy=False, include_internal=False, memo=None):
    """
    Return a dictionary representation of the features. The returned dictionary is always a shallow
    copy of the original dictionary of features, but will be a deep copy if the parameter `deepcopy` is True.

    Note:
        Features with names that start with two underscores are considered "internal/transient" features
        and not saved. Features with names that start with one underscore are considered "internal" but
        do get saved/serialized.

    Args:
        deepcopy: if True, the dictionary is a deep copy so that mutable objects
            in the original are unaffected if they get modified in the copy. (Default value = False)
        include_internal: if True, all features, even those with names that start with double underscore ("__")
            are included in the result dictionary, these features are usually dropped.
        memo: if deepcopy is True, the memo object to use for deepcopy, if any

    Returns:
      the dict representation of the features

    """
    ret = dict()
    for k, v in self.data.items():
        if not include_internal and k.startswith("__"):
            continue
        if deepcopy:
            ret[k] = lib_copy.deepcopy(v)
        else:
            ret[k] = v
    return ret