Source code for gatelfdata.featurengram
"""Module for the FeatureNGram class"""
import logging
import sys
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
streamhandler = logging.StreamHandler(stream=sys.stderr)
formatter = logging.Formatter(
'%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
streamhandler.setFormatter(formatter)
logger.addHandler(streamhandler)
[docs]class FeatureNgram(object):
"""Represents an ngram attribute. The value of such an attribute is a list/sequence of
things that can be represented by embeddings, """
def __init__(self, fname, attrinfo, featurestats, vocab):
"""Create the instance from the given meta info of an input feature"""
logger.debug("Creating FeatureNgram instance for fname/attrinfo=%r/%r", fname, attrinfo)
self.fname = fname
self.attrinfo = attrinfo
self.featurestats = featurestats
self.vocab = vocab
[docs] def type_converted(self):
"""Return the name of the type of information of the feature, once it is converted to
internal format."""
return "indexlist"
[docs] def type_original(self):
"""Return the name of the type of information of the original feature."""
return "ngram"
def __call__(self, value, normalize=None):
"""Convert a value of the expected type for this feature to a value that can be
fed into the corresponding input unit of the network"""
if normalize:
raise Exception("Normalization does not make sense for ngram features")
# ok, for an ngram we expect the value to be a list, in which case we
# create a new list with the string idices of the values
# otherwise, we report an error
if isinstance(value, list):
ret = [self.vocab.string2idx(v) for v in value]
return ret
else:
raise Exception("Value for converting FeatureNgram not a list but {} of type {}".format(value, type(value)))
def __str__(self):
return "FeatureNgram(name=%s)" % self.fname
def __repr__(self):
return "FeatureNgram(name=%r)" % self.fname