Module gatenlp.pam.matcher

Module for matchers to use with pampac patterns and elsewhere.

Expand source code
Module for matchers to use with pampac patterns and elsewhere.

import re
from gatenlp.utils import init_logger

_tmp_re_pattern = re.compile("x")
CLASS_RE_PATTERN = _tmp_re_pattern.__class__
    import regex
    _tmp_regex_pattern = regex.compile("x")
    CLASS_REGEX_PATTERN = _tmp_regex_pattern.__class__
except ImportError:
    # if the regex module is not available, make our  code still work by introducing a dummy type
    class RegexPattern:   # pylint: disable=C0115
    CLASS_REGEX_PATTERN = RegexPattern  # pylint: disable=C0103
    # make style checker happy
    regex = None

logger = init_logger(debug=False)

__pdoc__ = {
    "FeatureMatcher.__call__": True,
    "FeatureEqMatcher.__call__": True,
    "AnnMatcher.__call__": True,

class isIn:  # pylint: disable=C0103
    Helper  for use with the Feature matcher to check if a feature value is one of the
    values given to the constructor.
    def __init__(self, *args, matchcase=True):
        Literal values to compare against. The created callable returns true if called with
        one of these values.

            *args: values to match against.
            matchcase: if string values should get matched with exact case or not if False, uses
                the upper case variant for matching
        self.matchcase = matchcase
        if not matchcase:
            self.vals = [x.upper() for x in args if isinstance(x, str)]
            self.vals = args

    def __call__(self, value):
        if not self.matchcase:
            value = value.upper()
        return value in self.vals

class FeatureMatcher:
    Callable that matches the given dictionary against features.

    This creates a callable that can be used to easily check if features match the
    features and feature constraint defined by the matcher. When a matcher is created,
    the argument names are used as feature names and the argument values are either
    literal values to compare with or compiled regular expressions or callables.

    A FeatureMatcher matches as soon as all specified features match, no matter if the
    features we compare with contain additional features.

    In this example, the feature matcher will check if there are two features with the
    given names and values.

        fmatcher1 = FeatureMatcher(feature1 = "somevalue", feature2 = 999)
        if fmatcher(ann.features):
            print("Yay, both features are in ann.features!")

    In this example `feature1` matches if it matches the regular expression, and feature2
    matches if the given callable returns true.

        def checksize(x):
            return 12 <= x < 33
        NAMEPATTERN = re.compile(r"[A-Z][a-z_0-9]+")
        fmatcher2 = FeatureMatcher(name = NAMEPATTERN, size = checksize)

    def __init__(self, **kwargs):
        Create a FeatureMatcher instance.

            **kwargs: arbitrary key/value pairs to use for matching features.
        self.featurematches = kwargs  # "featurematcher"

    def __call__(self, features):
        Check if the passed features match the constraints for this FeatureMatcher.

        This returns true if all the constraints defined for this FeatureMatcher are satisfied,
        even if the features contain additional features not included in the constraints.

            features: the features to check

            True if the feature constraints are satisfied

        for fmn in self.featurematches:  # "featurematchername"
            if fmn not in features:
                return False
        for fmn, fmv in self.featurematches.items():  # "featurematchername"/"featurematchervalue"
            feature = features[fmn]
            if callable(fmv):
                if not fmv(feature):
                    return False
            elif isinstance(fmv, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
                fstr = str(feature)
                if not fmv.match(fstr):
                    return False
                fstr = str(feature)
                tmp = str(fmv)
                if tmp != fstr:
                    return False
        return True

class FeatureEqMatcher:
    Callable that matches the given dictionary against features and returns True only if all features
    match and there are no additional features.

    This works like FeatureMatcher, but all the features that get checked must satisfy the constraints
    and there must be no additional features.

    def __init__(self, **kwargs):
        Create a FeatureEqMatcher instance.

            **kwargs: arbitrary key/value pairs to use for matching features.
        self.featurematches = kwargs
        self._fm = FeatureMatcher(**kwargs)

    def __call__(self, features):
        Check if the passed features match the constraints for this FeatureMatcher.

        This returns true if all the constraints defined for this FeatureMatcher are satisfied,
        ONLY if the features do not contain additional features not included in the constraints.

            features: the features to check

            True if the feature constraints are satisfied
        for feat in features.keys():
            if feat not in self.featurematches:
                return False
        if not self._fm(features):
            return False
        return True

class AnnMatcher:
    A callable that matches an annotation.

    This creates a callable that can be used to check if an annotation satisfies all the constraints

    def __init__(self, type=None, features=None, features_eq=None, text=None):  # pylint: disable=W0622
        Create an AnnMatcher instance.

            type: if not None, match the type. If this is a string, match the literal string, if it is
                a compiled regular expression, match that expression, if it is a callable, call it and
                pass the type and use the return value as a boolean indicating if the type is a match.
            features: if specified, it must be a FeatureMatcher or a dictionary which is used as the kwargs  to create
                a FeatureMatcher instance for matching the features of the annotation.
            features_eq:  if specified, it must be a FeatureEqMatcher or a dictionary which is used as the kwargs
                to create a FeatureEqMatcher instance for matching the features of the annotation.
                Only one of features or features_eq should be used.
            text: if not None, match the document text covered by the annotation. For this the
                matcher must be called with the optional `doc` parameter.
        self.type = type
        if features_eq is not None:
            if callable(features_eq):
                self.features_matcher = features_eq
                self.features_matcher = FeatureEqMatcher(**features_eq)
        elif features is not None:
            if callable(features):
                self.features_matcher = features
                self.features_matcher = FeatureMatcher(**features)
            self.features_matcher = None
        self.text = text

    # pylint: disable=R0911,R0912
    def __call__(self, ann, doc=None):
        Check if the annotation matches.

            ann: the annotation to check
            doc: the document the annotation refers to, only needed if the matcher contains a "text"

            True if the annotation matches, False otherwise.

        if self.type is not None:
            if isinstance(self.type, str):
                if self.type != ann.type:
                    return False
            elif callable(self.type):
                if not self.type(ann.type):
                    return False
            elif isinstance(self.type, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
                if not self.type.match(ann.type):
                    return False
                tmp = str(self.type)
                if tmp != self.type:
                    return False
        if self.features_matcher is not None:
            if not self.features_matcher(ann.features):
                return False
        if self.text is not None:
            if isinstance(self.text, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
                if not self.text.match(doc[ann]):
                    return False
        return True

# Helpers for the Feature and Ann matchers: these are callables which provide a simple way to match
# text case insensitive or negate matching text or features

class Nocase:
    A matcher for comparing text in a case insensitive way.

    This carries out the matching by using the upper-case versions of the text compared and the
    text to compare with. This makes sure that cases like German "ß" which expands to "SS" are
    handled correctly (while uppercase "SS" often should NOT get converted to lowercase "ß").

        m1 = Nocase("sometext")
        assert m1("SomeText")
        assert m1("SOMETEXT")

    def __init__(self, text):
        Create a case insensitive text matcher.

            text: the text to match against.
        self.text = text.upper()

    def __call__(self, text):
        Check if the text matches.

            text: the text to check

            True if the text matches
        return text.upper() == self.text

class IfNot:
    A matcher that returns the negation of another matcher.

        m1 = FeatureMatcher(f1="x", f2=22)
        m2 = IfNot(m1)  # m2 matches for features which do not contain f1="x" and not f2=22

    def __init__(self, other):
        self.other = other

    def __call__(self, *args, **kwargs):
        return not self.other(*args, **kwargs)


class AnnMatcher (type=None, features=None, features_eq=None, text=None)

A callable that matches an annotation.

This creates a callable that can be used to check if an annotation satisfies all the constraints defined.

Create an AnnMatcher instance.


if not None, match the type. If this is a string, match the literal string, if it is a compiled regular expression, match that expression, if it is a callable, call it and pass the type and use the return value as a boolean indicating if the type is a match.
if specified, it must be a FeatureMatcher or a dictionary which is used as the kwargs to create a FeatureMatcher instance for matching the features of the annotation.
if specified, it must be a FeatureEqMatcher or a dictionary which is used as the kwargs to create a FeatureEqMatcher instance for matching the features of the annotation. Only one of features or features_eq should be used.
if not None, match the document text covered by the annotation. For this the matcher must be called with the optional doc parameter.
Expand source code
class AnnMatcher:
    A callable that matches an annotation.

    This creates a callable that can be used to check if an annotation satisfies all the constraints

    def __init__(self, type=None, features=None, features_eq=None, text=None):  # pylint: disable=W0622
        Create an AnnMatcher instance.

            type: if not None, match the type. If this is a string, match the literal string, if it is
                a compiled regular expression, match that expression, if it is a callable, call it and
                pass the type and use the return value as a boolean indicating if the type is a match.
            features: if specified, it must be a FeatureMatcher or a dictionary which is used as the kwargs  to create
                a FeatureMatcher instance for matching the features of the annotation.
            features_eq:  if specified, it must be a FeatureEqMatcher or a dictionary which is used as the kwargs
                to create a FeatureEqMatcher instance for matching the features of the annotation.
                Only one of features or features_eq should be used.
            text: if not None, match the document text covered by the annotation. For this the
                matcher must be called with the optional `doc` parameter.
        self.type = type
        if features_eq is not None:
            if callable(features_eq):
                self.features_matcher = features_eq
                self.features_matcher = FeatureEqMatcher(**features_eq)
        elif features is not None:
            if callable(features):
                self.features_matcher = features
                self.features_matcher = FeatureMatcher(**features)
            self.features_matcher = None
        self.text = text

    # pylint: disable=R0911,R0912
    def __call__(self, ann, doc=None):
        Check if the annotation matches.

            ann: the annotation to check
            doc: the document the annotation refers to, only needed if the matcher contains a "text"

            True if the annotation matches, False otherwise.

        if self.type is not None:
            if isinstance(self.type, str):
                if self.type != ann.type:
                    return False
            elif callable(self.type):
                if not self.type(ann.type):
                    return False
            elif isinstance(self.type, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
                if not self.type.match(ann.type):
                    return False
                tmp = str(self.type)
                if tmp != self.type:
                    return False
        if self.features_matcher is not None:
            if not self.features_matcher(ann.features):
                return False
        if self.text is not None:
            if isinstance(self.text, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
                if not self.text.match(doc[ann]):
                    return False
        return True


def __call__(self, ann, doc=None)

Check if the annotation matches.


the annotation to check
the document the annotation refers to, only needed if the matcher contains a "text" constraint.


True if the annotation matches, False otherwise.

Expand source code
def __call__(self, ann, doc=None):
    Check if the annotation matches.

        ann: the annotation to check
        doc: the document the annotation refers to, only needed if the matcher contains a "text"

        True if the annotation matches, False otherwise.

    if self.type is not None:
        if isinstance(self.type, str):
            if self.type != ann.type:
                return False
        elif callable(self.type):
            if not self.type(ann.type):
                return False
        elif isinstance(self.type, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
            if not self.type.match(ann.type):
                return False
            tmp = str(self.type)
            if tmp != self.type:
                return False
    if self.features_matcher is not None:
        if not self.features_matcher(ann.features):
            return False
    if self.text is not None:
        if isinstance(self.text, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
            if not self.text.match(doc[ann]):
                return False
    return True
class FeatureEqMatcher (**kwargs)

Callable that matches the given dictionary against features and returns True only if all features match and there are no additional features.

This works like FeatureMatcher, but all the features that get checked must satisfy the constraints and there must be no additional features.

Create a FeatureEqMatcher instance.


arbitrary key/value pairs to use for matching features.
Expand source code
class FeatureEqMatcher:
    Callable that matches the given dictionary against features and returns True only if all features
    match and there are no additional features.

    This works like FeatureMatcher, but all the features that get checked must satisfy the constraints
    and there must be no additional features.

    def __init__(self, **kwargs):
        Create a FeatureEqMatcher instance.

            **kwargs: arbitrary key/value pairs to use for matching features.
        self.featurematches = kwargs
        self._fm = FeatureMatcher(**kwargs)

    def __call__(self, features):
        Check if the passed features match the constraints for this FeatureMatcher.

        This returns true if all the constraints defined for this FeatureMatcher are satisfied,
        ONLY if the features do not contain additional features not included in the constraints.

            features: the features to check

            True if the feature constraints are satisfied
        for feat in features.keys():
            if feat not in self.featurematches:
                return False
        if not self._fm(features):
            return False
        return True


def __call__(self, features)

Check if the passed features match the constraints for this FeatureMatcher.

This returns true if all the constraints defined for this FeatureMatcher are satisfied, ONLY if the features do not contain additional features not included in the constraints.


the features to check


True if the feature constraints are satisfied

Expand source code
def __call__(self, features):
    Check if the passed features match the constraints for this FeatureMatcher.

    This returns true if all the constraints defined for this FeatureMatcher are satisfied,
    ONLY if the features do not contain additional features not included in the constraints.

        features: the features to check

        True if the feature constraints are satisfied
    for feat in features.keys():
        if feat not in self.featurematches:
            return False
    if not self._fm(features):
        return False
    return True
class FeatureMatcher (**kwargs)

Callable that matches the given dictionary against features.

This creates a callable that can be used to easily check if features match the features and feature constraint defined by the matcher. When a matcher is created, the argument names are used as feature names and the argument values are either literal values to compare with or compiled regular expressions or callables.

A FeatureMatcher matches as soon as all specified features match, no matter if the features we compare with contain additional features.

In this example, the feature matcher will check if there are two features with the given names and values.


fmatcher1 = FeatureMatcher(feature1 = "somevalue", feature2 = 999)
if fmatcher(ann.features):
    print("Yay, both features are in ann.features!")

In this example feature1 matches if it matches the regular expression, and feature2 matches if the given callable returns true.


def checksize(x):
    return 12 <= x < 33
NAMEPATTERN = re.compile(r"[A-Z][a-z_0-9]+")
fmatcher2 = FeatureMatcher(name = NAMEPATTERN, size = checksize)

Create a FeatureMatcher instance.


arbitrary key/value pairs to use for matching features.
Expand source code
class FeatureMatcher:
    Callable that matches the given dictionary against features.

    This creates a callable that can be used to easily check if features match the
    features and feature constraint defined by the matcher. When a matcher is created,
    the argument names are used as feature names and the argument values are either
    literal values to compare with or compiled regular expressions or callables.

    A FeatureMatcher matches as soon as all specified features match, no matter if the
    features we compare with contain additional features.

    In this example, the feature matcher will check if there are two features with the
    given names and values.

        fmatcher1 = FeatureMatcher(feature1 = "somevalue", feature2 = 999)
        if fmatcher(ann.features):
            print("Yay, both features are in ann.features!")

    In this example `feature1` matches if it matches the regular expression, and feature2
    matches if the given callable returns true.

        def checksize(x):
            return 12 <= x < 33
        NAMEPATTERN = re.compile(r"[A-Z][a-z_0-9]+")
        fmatcher2 = FeatureMatcher(name = NAMEPATTERN, size = checksize)

    def __init__(self, **kwargs):
        Create a FeatureMatcher instance.

            **kwargs: arbitrary key/value pairs to use for matching features.
        self.featurematches = kwargs  # "featurematcher"

    def __call__(self, features):
        Check if the passed features match the constraints for this FeatureMatcher.

        This returns true if all the constraints defined for this FeatureMatcher are satisfied,
        even if the features contain additional features not included in the constraints.

            features: the features to check

            True if the feature constraints are satisfied

        for fmn in self.featurematches:  # "featurematchername"
            if fmn not in features:
                return False
        for fmn, fmv in self.featurematches.items():  # "featurematchername"/"featurematchervalue"
            feature = features[fmn]
            if callable(fmv):
                if not fmv(feature):
                    return False
            elif isinstance(fmv, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
                fstr = str(feature)
                if not fmv.match(fstr):
                    return False
                fstr = str(feature)
                tmp = str(fmv)
                if tmp != fstr:
                    return False
        return True


def __call__(self, features)

Check if the passed features match the constraints for this FeatureMatcher.

This returns true if all the constraints defined for this FeatureMatcher are satisfied, even if the features contain additional features not included in the constraints.


the features to check


True if the feature constraints are satisfied

Expand source code
def __call__(self, features):
    Check if the passed features match the constraints for this FeatureMatcher.

    This returns true if all the constraints defined for this FeatureMatcher are satisfied,
    even if the features contain additional features not included in the constraints.

        features: the features to check

        True if the feature constraints are satisfied

    for fmn in self.featurematches:  # "featurematchername"
        if fmn not in features:
            return False
    for fmn, fmv in self.featurematches.items():  # "featurematchername"/"featurematchervalue"
        feature = features[fmn]
        if callable(fmv):
            if not fmv(feature):
                return False
        elif isinstance(fmv, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
            fstr = str(feature)
            if not fmv.match(fstr):
                return False
            fstr = str(feature)
            tmp = str(fmv)
            if tmp != fstr:
                return False
    return True
class IfNot (other)

A matcher that returns the negation of another matcher.


m1 = FeatureMatcher(f1="x", f2=22)
m2 = IfNot(m1)  # m2 matches for features which do not contain f1="x" and not f2=22
Expand source code
class IfNot:
    A matcher that returns the negation of another matcher.

        m1 = FeatureMatcher(f1="x", f2=22)
        m2 = IfNot(m1)  # m2 matches for features which do not contain f1="x" and not f2=22

    def __init__(self, other):
        self.other = other

    def __call__(self, *args, **kwargs):
        return not self.other(*args, **kwargs)
class Nocase (text)

A matcher for comparing text in a case insensitive way.

This carries out the matching by using the upper-case versions of the text compared and the text to compare with. This makes sure that cases like German "ß" which expands to "SS" are handled correctly (while uppercase "SS" often should NOT get converted to lowercase "ß").


m1 = Nocase("sometext")
assert m1("SomeText")
assert m1("SOMETEXT")

Create a case insensitive text matcher.


the text to match against.
Expand source code
class Nocase:
    A matcher for comparing text in a case insensitive way.

    This carries out the matching by using the upper-case versions of the text compared and the
    text to compare with. This makes sure that cases like German "ß" which expands to "SS" are
    handled correctly (while uppercase "SS" often should NOT get converted to lowercase "ß").

        m1 = Nocase("sometext")
        assert m1("SomeText")
        assert m1("SOMETEXT")

    def __init__(self, text):
        Create a case insensitive text matcher.

            text: the text to match against.
        self.text = text.upper()

    def __call__(self, text):
        Check if the text matches.

            text: the text to check

            True if the text matches
        return text.upper() == self.text

Compiled regex object

Instance variables

var flags

The regex matching flags.

var groupindex

A dictionary mapping group names to group numbers.

var groups

The number of capturing groups in the pattern.

var named_lists

The named lists used by the regex.

var pattern

The pattern string from which the regex object was compiled.


def findall(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None)

findall(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None) –> list. Return a list of all matches of pattern in string. The matches may be overlapped if overlapped is True.

def finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None)

finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None) –> iterator. Return an iterator over all matches for the RE pattern in string. The matches may be overlapped if overlapped is True. For each match, the iterator returns a MatchObject.

def fullmatch(string, pos=None, endpos=None, concurrent=None, timeout=None)

fullmatch(string, pos=None, endpos=None, concurrent=None, timeout=None) –> MatchObject or None. Match zero or more characters against all of the string.

def match(string, pos=None, endpos=None, concurrent=None, timeout=None)

match(string, pos=None, endpos=None, concurrent=None, timeout=None) –> MatchObject or None. Match zero or more characters at the beginning of the string.

def scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None)

scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None) –> scanner. Return an scanner for the RE pattern in string. The matches may be overlapped if overlapped is True.

def search(string, pos=None, endpos=None, concurrent=None, timeout=None)

search(string, pos=None, endpos=None, concurrent=None, timeout=None) –> MatchObject or None. Search through string looking for a match, and return a corresponding match object instance. Return None if no match is found.

def split(string, maxsplit=0, concurrent=None, timeout=None)

split(string, maxsplit=0, concurrent=None, timeout=None) –> list. Split string by the occurrences of pattern.

def splititer(string, maxsplit=0, concurrent=None, timeout=None)

splititer(string, maxsplit=0, concurrent=None, timeout=None) –> iterator. Return an iterator yielding the parts of a split string.

def sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None)

sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> newstring Return the string obtained by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern in string by the replacement repl.

def subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None)

subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> newstring Return the string obtained by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern in string by the replacement format.

def subfn(...)

subfn(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> (newstring, number of subs) Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern with the replacement format.

def subn(...)

subn(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> (newstring, number of subs) Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern with the replacement repl.

class isIn (*args, matchcase=True)

Helper for use with the Feature matcher to check if a feature value is one of the values given to the constructor.

Literal values to compare against. The created callable returns true if called with one of these values.


values to match against.
if string values should get matched with exact case or not if False, uses the upper case variant for matching
Expand source code
class isIn:  # pylint: disable=C0103
    Helper  for use with the Feature matcher to check if a feature value is one of the
    values given to the constructor.
    def __init__(self, *args, matchcase=True):
        Literal values to compare against. The created callable returns true if called with
        one of these values.

            *args: values to match against.
            matchcase: if string values should get matched with exact case or not if False, uses
                the upper case variant for matching
        self.matchcase = matchcase
        if not matchcase:
            self.vals = [x.upper() for x in args if isinstance(x, str)]
            self.vals = args

    def __call__(self, value):
        if not self.matchcase:
            value = value.upper()
        return value in self.vals