Module gatenlp.processing.client.rewire
Rewire client.
Expand source code
"""
Rewire client.
"""
import time
import requests
from typing import Optional, Dict
from gatenlp.processing.annotator import Annotator
from gatenlp.utils import init_logger
class RewireAnnotator(Annotator):
"""
An annotator that sends text to the Rewire classification service
(see https://rewire.online/rewire-api-access/)
and uses the result to set either document or annotation features.
"""
def __init__(
self,
url: Optional[str] = None,
auth_token: Optional[str] = None,
# lang="en",
annset_name: Optional[str] = "",
ann_type: Optional[str] = None,
ann_feature: Optional[str] = None,
attr2feature: Optional[Dict[str,str]] = None,
min_delay_ms=0,
):
"""
Create a Rewire annotator. The annotator stores the scores returned from the
Rewire service as features, either in the annotation that gets processed or the document
features if the whole document is getting processed.
Args:
url: the annotation service endpoint, if None, the default endpoint URL
auth_token: the authentication token needed to use the service, this is required!
annset_name: if an annotation type is specified, the name of the annotation set to use
ann_type: if this is specified, the text of those annoations is getting classified and
the result is stored as annotaiton features. Otherwise, the whole document is classified
and the result is stored as document features.
ann_feature: if ann_type is specified, and this is also specified, the text is taken from
that feature instead of the underlying document text.
attr2feature: a dictionary mapping the attributes (score names) returned from the
service to feature names. Currently attributes "hate" and "abuse" are returned.
min_delay_ms: minimum time in ms to wait between requests to the server
"""
if url is None:
url = "https://api.rewire.online/classify"
assert auth_token
self.auth_token = auth_token
self.url = url
self.min_delay_s = min_delay_ms / 1000.0
self.logger = init_logger()
# self.logger.setLevel(logging.DEBUG)
self._last_call_time = 0
self.ann_type = ann_type
self.ann_feature = ann_feature
self.attr2feature = attr2feature
self.annset_name = annset_name
def _call_api(self, text, attr2feature=None):
"""Send text to API respecting min delay and get back dict"""
delay = time.time() - self._last_call_time
if delay < self.min_delay_s:
time.sleep(self.min_delay_s - delay)
response = requests.post(
self.url,
json=dict(text=text),
headers={"x-api-key": self.auth_token})
ret = response.json()
if "message" in ret and "scores" not in ret:
raise Exception(f"API call problem, message is: {ret['message']}")
ret = ret["scores"]
if attr2feature:
retnew = {}
for k, v in ret.items():
knew = attr2feature.get(k, k)
retnew[knew] = v
ret = retnew
return ret
def __call__(self, doc, **kwargs):
if self.ann_type is not None:
annset = doc.annset(self.annset_name).with_type(self.ann_type)
for ann in annset:
if self.ann_feature:
txt = ann.features.get(self.ann_feature, "")
else:
txt = doc[ann]
ret = self._call_api(txt, attr2feature=self.attr2feature)
if isinstance(ret, dict):
ann.features.update(ret)
else:
ret = self._call_api(doc.text, attr2feature=self.attr2feature)
if isinstance(ret, dict):
doc.features.update(ret)
return doc
Classes
class RewireAnnotator (url: Optional[str] = None, auth_token: Optional[str] = None, annset_name: Optional[str] = '', ann_type: Optional[str] = None, ann_feature: Optional[str] = None, attr2feature: Optional[Dict[str, str]] = None, min_delay_ms=0)
-
An annotator that sends text to the Rewire classification service (see https://rewire.online/rewire-api-access/) and uses the result to set either document or annotation features.
Create a Rewire annotator. The annotator stores the scores returned from the Rewire service as features, either in the annotation that gets processed or the document features if the whole document is getting processed.
Args
url
- the annotation service endpoint, if None, the default endpoint URL
auth_token
- the authentication token needed to use the service, this is required!
annset_name
- if an annotation type is specified, the name of the annotation set to use
ann_type
- if this is specified, the text of those annoations is getting classified and the result is stored as annotaiton features. Otherwise, the whole document is classified and the result is stored as document features.
ann_feature
- if ann_type is specified, and this is also specified, the text is taken from that feature instead of the underlying document text.
attr2feature
- a dictionary mapping the attributes (score names) returned from the service to feature names. Currently attributes "hate" and "abuse" are returned.
min_delay_ms
- minimum time in ms to wait between requests to the server
Expand source code
class RewireAnnotator(Annotator): """ An annotator that sends text to the Rewire classification service (see https://rewire.online/rewire-api-access/) and uses the result to set either document or annotation features. """ def __init__( self, url: Optional[str] = None, auth_token: Optional[str] = None, # lang="en", annset_name: Optional[str] = "", ann_type: Optional[str] = None, ann_feature: Optional[str] = None, attr2feature: Optional[Dict[str,str]] = None, min_delay_ms=0, ): """ Create a Rewire annotator. The annotator stores the scores returned from the Rewire service as features, either in the annotation that gets processed or the document features if the whole document is getting processed. Args: url: the annotation service endpoint, if None, the default endpoint URL auth_token: the authentication token needed to use the service, this is required! annset_name: if an annotation type is specified, the name of the annotation set to use ann_type: if this is specified, the text of those annoations is getting classified and the result is stored as annotaiton features. Otherwise, the whole document is classified and the result is stored as document features. ann_feature: if ann_type is specified, and this is also specified, the text is taken from that feature instead of the underlying document text. attr2feature: a dictionary mapping the attributes (score names) returned from the service to feature names. Currently attributes "hate" and "abuse" are returned. min_delay_ms: minimum time in ms to wait between requests to the server """ if url is None: url = "https://api.rewire.online/classify" assert auth_token self.auth_token = auth_token self.url = url self.min_delay_s = min_delay_ms / 1000.0 self.logger = init_logger() # self.logger.setLevel(logging.DEBUG) self._last_call_time = 0 self.ann_type = ann_type self.ann_feature = ann_feature self.attr2feature = attr2feature self.annset_name = annset_name def _call_api(self, text, attr2feature=None): """Send text to API respecting min delay and get back dict""" delay = time.time() - self._last_call_time if delay < self.min_delay_s: time.sleep(self.min_delay_s - delay) response = requests.post( self.url, json=dict(text=text), headers={"x-api-key": self.auth_token}) ret = response.json() if "message" in ret and "scores" not in ret: raise Exception(f"API call problem, message is: {ret['message']}") ret = ret["scores"] if attr2feature: retnew = {} for k, v in ret.items(): knew = attr2feature.get(k, k) retnew[knew] = v ret = retnew return ret def __call__(self, doc, **kwargs): if self.ann_type is not None: annset = doc.annset(self.annset_name).with_type(self.ann_type) for ann in annset: if self.ann_feature: txt = ann.features.get(self.ann_feature, "") else: txt = doc[ann] ret = self._call_api(txt, attr2feature=self.attr2feature) if isinstance(ret, dict): ann.features.update(ret) else: ret = self._call_api(doc.text, attr2feature=self.attr2feature) if isinstance(ret, dict): doc.features.update(ret) return doc
Ancestors
- Annotator
- abc.ABC
Inherited members