Module gatenlp.pam.matcher
Module for matchers to use with pampac patterns and elsewhere.
Expand source code
"""
Module for matchers to use with pampac patterns and elsewhere.
"""
import re
from gatenlp.utils import init_logger
_tmp_re_pattern = re.compile("x")
CLASS_RE_PATTERN = _tmp_re_pattern.__class__
try:
import regex
_tmp_regex_pattern = regex.compile("x")
CLASS_REGEX_PATTERN = _tmp_regex_pattern.__class__
except ImportError:
# if the regex module is not available, make our code still work by introducing a dummy type
class RegexPattern: # pylint: disable=C0115
pass
CLASS_REGEX_PATTERN = RegexPattern # pylint: disable=C0103
# make style checker happy
regex = None
logger = init_logger(debug=False)
__pdoc__ = {
"FeatureMatcher.__call__": True,
"FeatureEqMatcher.__call__": True,
"AnnMatcher.__call__": True,
}
class isIn: # pylint: disable=C0103
"""
Helper for use with the Feature matcher to check if a feature value is one of the
values given to the constructor.
"""
def __init__(self, *args, matchcase=True):
"""
Literal values to compare against. The created callable returns true if called with
one of these values.
Args:
*args: values to match against.
matchcase: if string values should get matched with exact case or not if False, uses
the upper case variant for matching
"""
self.matchcase = matchcase
if not matchcase:
self.vals = [x.upper() for x in args if isinstance(x, str)]
else:
self.vals = args
def __call__(self, value):
if not self.matchcase:
value = value.upper()
return value in self.vals
class FeatureMatcher:
"""
Callable that matches the given dictionary against features.
This creates a callable that can be used to easily check if features match the
features and feature constraint defined by the matcher. When a matcher is created,
the argument names are used as feature names and the argument values are either
literal values to compare with or compiled regular expressions or callables.
A FeatureMatcher matches as soon as all specified features match, no matter if the
features we compare with contain additional features.
In this example, the feature matcher will check if there are two features with the
given names and values.
Example:
```python
fmatcher1 = FeatureMatcher(feature1 = "somevalue", feature2 = 999)
if fmatcher(ann.features):
print("Yay, both features are in ann.features!")
```
In this example `feature1` matches if it matches the regular expression, and feature2
matches if the given callable returns true.
Example:
```python
def checksize(x):
return 12 <= x < 33
NAMEPATTERN = re.compile(r"[A-Z][a-z_0-9]+")
fmatcher2 = FeatureMatcher(name = NAMEPATTERN, size = checksize)
```
"""
def __init__(self, **kwargs):
"""
Create a FeatureMatcher instance.
Args:
**kwargs: arbitrary key/value pairs to use for matching features.
"""
self.featurematches = kwargs # "featurematcher"
def __call__(self, features):
"""
Check if the passed features match the constraints for this FeatureMatcher.
This returns true if all the constraints defined for this FeatureMatcher are satisfied,
even if the features contain additional features not included in the constraints.
Args:
features: the features to check
Returns:
True if the feature constraints are satisfied
"""
for fmn in self.featurematches: # "featurematchername"
if fmn not in features:
return False
for fmn, fmv in self.featurematches.items(): # "featurematchername"/"featurematchervalue"
feature = features[fmn]
if callable(fmv):
if not fmv(feature):
return False
elif isinstance(fmv, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
fstr = str(feature)
if not fmv.match(fstr):
return False
else:
fstr = str(feature)
tmp = str(fmv)
if tmp != fstr:
return False
return True
class FeatureEqMatcher:
"""
Callable that matches the given dictionary against features and returns True only if all features
match and there are no additional features.
This works like FeatureMatcher, but all the features that get checked must satisfy the constraints
and there must be no additional features.
"""
def __init__(self, **kwargs):
"""
Create a FeatureEqMatcher instance.
Args:
**kwargs: arbitrary key/value pairs to use for matching features.
"""
self.featurematches = kwargs
self._fm = FeatureMatcher(**kwargs)
def __call__(self, features):
"""
Check if the passed features match the constraints for this FeatureMatcher.
This returns true if all the constraints defined for this FeatureMatcher are satisfied,
ONLY if the features do not contain additional features not included in the constraints.
Args:
features: the features to check
Returns:
True if the feature constraints are satisfied
"""
for feat in features.keys():
if feat not in self.featurematches:
return False
if not self._fm(features):
return False
return True
class AnnMatcher:
"""
A callable that matches an annotation.
This creates a callable that can be used to check if an annotation satisfies all the constraints
defined.
"""
def __init__(self, type=None, features=None, features_eq=None, text=None): # pylint: disable=W0622
"""
Create an AnnMatcher instance.
Args:
type: if not None, match the type. If this is a string, match the literal string, if it is
a compiled regular expression, match that expression, if it is a callable, call it and
pass the type and use the return value as a boolean indicating if the type is a match.
features: if specified, it must be a FeatureMatcher or a dictionary which is used as the kwargs to create
a FeatureMatcher instance for matching the features of the annotation.
features_eq: if specified, it must be a FeatureEqMatcher or a dictionary which is used as the kwargs
to create a FeatureEqMatcher instance for matching the features of the annotation.
Only one of features or features_eq should be used.
text: if not None, match the document text covered by the annotation. For this the
matcher must be called with the optional `doc` parameter.
"""
self.type = type
if features_eq is not None:
if callable(features_eq):
self.features_matcher = features_eq
else:
self.features_matcher = FeatureEqMatcher(**features_eq)
elif features is not None:
if callable(features):
self.features_matcher = features
else:
self.features_matcher = FeatureMatcher(**features)
else:
self.features_matcher = None
self.text = text
# pylint: disable=R0911,R0912
def __call__(self, ann, doc=None):
"""
Check if the annotation matches.
Args:
ann: the annotation to check
doc: the document the annotation refers to, only needed if the matcher contains a "text"
constraint.
Returns:
True if the annotation matches, False otherwise.
"""
if self.type is not None:
if isinstance(self.type, str):
if self.type != ann.type:
return False
elif callable(self.type):
if not self.type(ann.type):
return False
elif isinstance(self.type, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
if not self.type.match(ann.type):
return False
else:
tmp = str(self.type)
if tmp != self.type:
return False
if self.features_matcher is not None:
if not self.features_matcher(ann.features):
return False
if self.text is not None:
if isinstance(self.text, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)):
if not self.text.match(doc[ann]):
return False
return True
# Helpers for the Feature and Ann matchers: these are callables which provide a simple way to match
# text case insensitive or negate matching text or features
class Nocase:
"""
A matcher for comparing text in a case insensitive way.
This carries out the matching by using the upper-case versions of the text compared and the
text to compare with. This makes sure that cases like German "ß" which expands to "SS" are
handled correctly (while uppercase "SS" often should NOT get converted to lowercase "ß").
Example:
```python
m1 = Nocase("sometext")
assert m1("SomeText")
assert m1("SOMETEXT")
```
"""
def __init__(self, text):
"""
Create a case insensitive text matcher.
Args:
text: the text to match against.
"""
self.text = text.upper()
def __call__(self, text):
"""
Check if the text matches.
Args:
text: the text to check
Returns:
True if the text matches
"""
return text.upper() == self.text
class IfNot:
"""
A matcher that returns the negation of another matcher.
Example:
```python
m1 = FeatureMatcher(f1="x", f2=22)
m2 = IfNot(m1) # m2 matches for features which do not contain f1="x" and not f2=22
```
"""
def __init__(self, other):
self.other = other
def __call__(self, *args, **kwargs):
return not self.other(*args, **kwargs)
Classes
class AnnMatcher (type=None, features=None, features_eq=None, text=None)
-
A callable that matches an annotation.
This creates a callable that can be used to check if an annotation satisfies all the constraints defined.
Create an AnnMatcher instance.
Args
type
- if not None, match the type. If this is a string, match the literal string, if it is a compiled regular expression, match that expression, if it is a callable, call it and pass the type and use the return value as a boolean indicating if the type is a match.
features
- if specified, it must be a FeatureMatcher or a dictionary which is used as the kwargs to create a FeatureMatcher instance for matching the features of the annotation.
features_eq
- if specified, it must be a FeatureEqMatcher or a dictionary which is used as the kwargs to create a FeatureEqMatcher instance for matching the features of the annotation. Only one of features or features_eq should be used.
text
- if not None, match the document text covered by the annotation. For this the
matcher must be called with the optional
doc
parameter.
Expand source code
class AnnMatcher: """ A callable that matches an annotation. This creates a callable that can be used to check if an annotation satisfies all the constraints defined. """ def __init__(self, type=None, features=None, features_eq=None, text=None): # pylint: disable=W0622 """ Create an AnnMatcher instance. Args: type: if not None, match the type. If this is a string, match the literal string, if it is a compiled regular expression, match that expression, if it is a callable, call it and pass the type and use the return value as a boolean indicating if the type is a match. features: if specified, it must be a FeatureMatcher or a dictionary which is used as the kwargs to create a FeatureMatcher instance for matching the features of the annotation. features_eq: if specified, it must be a FeatureEqMatcher or a dictionary which is used as the kwargs to create a FeatureEqMatcher instance for matching the features of the annotation. Only one of features or features_eq should be used. text: if not None, match the document text covered by the annotation. For this the matcher must be called with the optional `doc` parameter. """ self.type = type if features_eq is not None: if callable(features_eq): self.features_matcher = features_eq else: self.features_matcher = FeatureEqMatcher(**features_eq) elif features is not None: if callable(features): self.features_matcher = features else: self.features_matcher = FeatureMatcher(**features) else: self.features_matcher = None self.text = text # pylint: disable=R0911,R0912 def __call__(self, ann, doc=None): """ Check if the annotation matches. Args: ann: the annotation to check doc: the document the annotation refers to, only needed if the matcher contains a "text" constraint. Returns: True if the annotation matches, False otherwise. """ if self.type is not None: if isinstance(self.type, str): if self.type != ann.type: return False elif callable(self.type): if not self.type(ann.type): return False elif isinstance(self.type, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)): if not self.type.match(ann.type): return False else: tmp = str(self.type) if tmp != self.type: return False if self.features_matcher is not None: if not self.features_matcher(ann.features): return False if self.text is not None: if isinstance(self.text, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)): if not self.text.match(doc[ann]): return False return True
Methods
def __call__(self, ann, doc=None)
-
Check if the annotation matches.
Args
ann
- the annotation to check
doc
- the document the annotation refers to, only needed if the matcher contains a "text" constraint.
Returns
True if the annotation matches, False otherwise.
Expand source code
def __call__(self, ann, doc=None): """ Check if the annotation matches. Args: ann: the annotation to check doc: the document the annotation refers to, only needed if the matcher contains a "text" constraint. Returns: True if the annotation matches, False otherwise. """ if self.type is not None: if isinstance(self.type, str): if self.type != ann.type: return False elif callable(self.type): if not self.type(ann.type): return False elif isinstance(self.type, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)): if not self.type.match(ann.type): return False else: tmp = str(self.type) if tmp != self.type: return False if self.features_matcher is not None: if not self.features_matcher(ann.features): return False if self.text is not None: if isinstance(self.text, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)): if not self.text.match(doc[ann]): return False return True
class FeatureEqMatcher (**kwargs)
-
Callable that matches the given dictionary against features and returns True only if all features match and there are no additional features.
This works like FeatureMatcher, but all the features that get checked must satisfy the constraints and there must be no additional features.
Create a FeatureEqMatcher instance.
Args
**kwargs
- arbitrary key/value pairs to use for matching features.
Expand source code
class FeatureEqMatcher: """ Callable that matches the given dictionary against features and returns True only if all features match and there are no additional features. This works like FeatureMatcher, but all the features that get checked must satisfy the constraints and there must be no additional features. """ def __init__(self, **kwargs): """ Create a FeatureEqMatcher instance. Args: **kwargs: arbitrary key/value pairs to use for matching features. """ self.featurematches = kwargs self._fm = FeatureMatcher(**kwargs) def __call__(self, features): """ Check if the passed features match the constraints for this FeatureMatcher. This returns true if all the constraints defined for this FeatureMatcher are satisfied, ONLY if the features do not contain additional features not included in the constraints. Args: features: the features to check Returns: True if the feature constraints are satisfied """ for feat in features.keys(): if feat not in self.featurematches: return False if not self._fm(features): return False return True
Methods
def __call__(self, features)
-
Check if the passed features match the constraints for this FeatureMatcher.
This returns true if all the constraints defined for this FeatureMatcher are satisfied, ONLY if the features do not contain additional features not included in the constraints.
Args
features
- the features to check
Returns
True if the feature constraints are satisfied
Expand source code
def __call__(self, features): """ Check if the passed features match the constraints for this FeatureMatcher. This returns true if all the constraints defined for this FeatureMatcher are satisfied, ONLY if the features do not contain additional features not included in the constraints. Args: features: the features to check Returns: True if the feature constraints are satisfied """ for feat in features.keys(): if feat not in self.featurematches: return False if not self._fm(features): return False return True
class FeatureMatcher (**kwargs)
-
Callable that matches the given dictionary against features.
This creates a callable that can be used to easily check if features match the features and feature constraint defined by the matcher. When a matcher is created, the argument names are used as feature names and the argument values are either literal values to compare with or compiled regular expressions or callables.
A FeatureMatcher matches as soon as all specified features match, no matter if the features we compare with contain additional features.
In this example, the feature matcher will check if there are two features with the given names and values.
Example
fmatcher1 = FeatureMatcher(feature1 = "somevalue", feature2 = 999) if fmatcher(ann.features): print("Yay, both features are in ann.features!")
In this example
feature1
matches if it matches the regular expression, and feature2 matches if the given callable returns true.Example
def checksize(x): return 12 <= x < 33 NAMEPATTERN = re.compile(r"[A-Z][a-z_0-9]+") fmatcher2 = FeatureMatcher(name = NAMEPATTERN, size = checksize)
Create a FeatureMatcher instance.
Args
**kwargs
- arbitrary key/value pairs to use for matching features.
Expand source code
class FeatureMatcher: """ Callable that matches the given dictionary against features. This creates a callable that can be used to easily check if features match the features and feature constraint defined by the matcher. When a matcher is created, the argument names are used as feature names and the argument values are either literal values to compare with or compiled regular expressions or callables. A FeatureMatcher matches as soon as all specified features match, no matter if the features we compare with contain additional features. In this example, the feature matcher will check if there are two features with the given names and values. Example: ```python fmatcher1 = FeatureMatcher(feature1 = "somevalue", feature2 = 999) if fmatcher(ann.features): print("Yay, both features are in ann.features!") ``` In this example `feature1` matches if it matches the regular expression, and feature2 matches if the given callable returns true. Example: ```python def checksize(x): return 12 <= x < 33 NAMEPATTERN = re.compile(r"[A-Z][a-z_0-9]+") fmatcher2 = FeatureMatcher(name = NAMEPATTERN, size = checksize) ``` """ def __init__(self, **kwargs): """ Create a FeatureMatcher instance. Args: **kwargs: arbitrary key/value pairs to use for matching features. """ self.featurematches = kwargs # "featurematcher" def __call__(self, features): """ Check if the passed features match the constraints for this FeatureMatcher. This returns true if all the constraints defined for this FeatureMatcher are satisfied, even if the features contain additional features not included in the constraints. Args: features: the features to check Returns: True if the feature constraints are satisfied """ for fmn in self.featurematches: # "featurematchername" if fmn not in features: return False for fmn, fmv in self.featurematches.items(): # "featurematchername"/"featurematchervalue" feature = features[fmn] if callable(fmv): if not fmv(feature): return False elif isinstance(fmv, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)): fstr = str(feature) if not fmv.match(fstr): return False else: fstr = str(feature) tmp = str(fmv) if tmp != fstr: return False return True
Methods
def __call__(self, features)
-
Check if the passed features match the constraints for this FeatureMatcher.
This returns true if all the constraints defined for this FeatureMatcher are satisfied, even if the features contain additional features not included in the constraints.
Args
features
- the features to check
Returns
True if the feature constraints are satisfied
Expand source code
def __call__(self, features): """ Check if the passed features match the constraints for this FeatureMatcher. This returns true if all the constraints defined for this FeatureMatcher are satisfied, even if the features contain additional features not included in the constraints. Args: features: the features to check Returns: True if the feature constraints are satisfied """ for fmn in self.featurematches: # "featurematchername" if fmn not in features: return False for fmn, fmv in self.featurematches.items(): # "featurematchername"/"featurematchervalue" feature = features[fmn] if callable(fmv): if not fmv(feature): return False elif isinstance(fmv, (CLASS_RE_PATTERN, CLASS_REGEX_PATTERN)): fstr = str(feature) if not fmv.match(fstr): return False else: fstr = str(feature) tmp = str(fmv) if tmp != fstr: return False return True
class IfNot (other)
-
A matcher that returns the negation of another matcher.
Example
m1 = FeatureMatcher(f1="x", f2=22) m2 = IfNot(m1) # m2 matches for features which do not contain f1="x" and not f2=22
Expand source code
class IfNot: """ A matcher that returns the negation of another matcher. Example: ```python m1 = FeatureMatcher(f1="x", f2=22) m2 = IfNot(m1) # m2 matches for features which do not contain f1="x" and not f2=22 ``` """ def __init__(self, other): self.other = other def __call__(self, *args, **kwargs): return not self.other(*args, **kwargs)
class Nocase (text)
-
A matcher for comparing text in a case insensitive way.
This carries out the matching by using the upper-case versions of the text compared and the text to compare with. This makes sure that cases like German "ß" which expands to "SS" are handled correctly (while uppercase "SS" often should NOT get converted to lowercase "ß").
Example
m1 = Nocase("sometext") assert m1("SomeText") assert m1("SOMETEXT")
Create a case insensitive text matcher.
Args
text
- the text to match against.
Expand source code
class Nocase: """ A matcher for comparing text in a case insensitive way. This carries out the matching by using the upper-case versions of the text compared and the text to compare with. This makes sure that cases like German "ß" which expands to "SS" are handled correctly (while uppercase "SS" often should NOT get converted to lowercase "ß"). Example: ```python m1 = Nocase("sometext") assert m1("SomeText") assert m1("SOMETEXT") ``` """ def __init__(self, text): """ Create a case insensitive text matcher. Args: text: the text to match against. """ self.text = text.upper() def __call__(self, text): """ Check if the text matches. Args: text: the text to check Returns: True if the text matches """ return text.upper() == self.text
class CLASS_REGEX_PATTERN
-
Compiled regex object
Instance variables
var flags
-
The regex matching flags.
var groupindex
-
A dictionary mapping group names to group numbers.
var groups
-
The number of capturing groups in the pattern.
var named_lists
-
The named lists used by the regex.
var pattern
-
The pattern string from which the regex object was compiled.
Methods
def findall(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None)
-
findall(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None) –> list. Return a list of all matches of pattern in string. The matches may be overlapped if overlapped is True.
def finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None)
-
finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None) –> iterator. Return an iterator over all matches for the RE pattern in string. The matches may be overlapped if overlapped is True. For each match, the iterator returns a MatchObject.
def fullmatch(string, pos=None, endpos=None, concurrent=None, timeout=None)
-
fullmatch(string, pos=None, endpos=None, concurrent=None, timeout=None) –> MatchObject or None. Match zero or more characters against all of the string.
def match(string, pos=None, endpos=None, concurrent=None, timeout=None)
-
match(string, pos=None, endpos=None, concurrent=None, timeout=None) –> MatchObject or None. Match zero or more characters at the beginning of the string.
def scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None)
-
scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None, timeout=None) –> scanner. Return an scanner for the RE pattern in string. The matches may be overlapped if overlapped is True.
def search(string, pos=None, endpos=None, concurrent=None, timeout=None)
-
search(string, pos=None, endpos=None, concurrent=None, timeout=None) –> MatchObject or None. Search through string looking for a match, and return a corresponding match object instance. Return None if no match is found.
def split(string, maxsplit=0, concurrent=None, timeout=None)
-
split(string, maxsplit=0, concurrent=None, timeout=None) –> list. Split string by the occurrences of pattern.
def splititer(string, maxsplit=0, concurrent=None, timeout=None)
-
splititer(string, maxsplit=0, concurrent=None, timeout=None) –> iterator. Return an iterator yielding the parts of a split string.
def sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None)
-
sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> newstring Return the string obtained by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern in string by the replacement repl.
def subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None)
-
subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> newstring Return the string obtained by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern in string by the replacement format.
def subfn(...)
-
subfn(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> (newstring, number of subs) Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern with the replacement format.
def subn(...)
-
subn(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None, timeout=None) –> (newstring, number of subs) Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost (or rightmost with a reverse pattern) non-overlapping occurrences of pattern with the replacement repl.
class isIn (*args, matchcase=True)
-
Helper for use with the Feature matcher to check if a feature value is one of the values given to the constructor.
Literal values to compare against. The created callable returns true if called with one of these values.
Args
*args
- values to match against.
matchcase
- if string values should get matched with exact case or not if False, uses the upper case variant for matching
Expand source code
class isIn: # pylint: disable=C0103 """ Helper for use with the Feature matcher to check if a feature value is one of the values given to the constructor. """ def __init__(self, *args, matchcase=True): """ Literal values to compare against. The created callable returns true if called with one of these values. Args: *args: values to match against. matchcase: if string values should get matched with exact case or not if False, uses the upper case variant for matching """ self.matchcase = matchcase if not matchcase: self.vals = [x.upper() for x in args if isinstance(x, str)] else: self.vals = args def __call__(self, value): if not self.matchcase: value = value.upper() return value in self.vals