Perspective Annotator
The PerspectiveAnnotator
is an annotator that uses the Perspective service to annotate documents.
import os
from gatenlp import Document
from gatenlp.processing.client.perspective import PerspectiveAnnotator
from gatenlp.lib_spacy import AnnSpacy
apikey = os.environ["PERSPECTIVE_KEY"]
docs = [
Document("Barack Obama visited Microsoft in New York last May."),
Document("""This is just some example text.
Has a sentence that talks about shit in general.
And another talking about 💩💩💩💩 in general. This guy is a moron."""),
Document("What a stupid bitch she is."),
Document("I am going to kill you, asshole!"),
]
# Just use the sentence annotations for annotating texts on a per-sentence level later
anntr = AnnSpacy(add_tokens=False, add_entities=False, add_sentences=True, add_nounchunks=False, add_deps=False)
for doc in docs:
doc = anntr(doc)
# Annotator for annotating documents as a whole
perspective_doc = PerspectiveAnnotator(auth_token=apikey,
requested_attributes=["TOXICITY", "INSULT", "PROFANITY"])
# Annotator for annotating sentences
perspective_sent = PerspectiveAnnotator(auth_token=apikey,
ann_type="Sentence",
requested_attributes=["TOXICITY", "INSULT", "PROFANITY"])
# run both annotators over all documents, show the document features assigned for each
for idx, doc in enumerate(docs):
doc.features.clear()
perspective_doc(doc)
print("Document", idx, ":", doc.features)
perspective_sent(doc)
2022-11-09 22:02:56,809|INFO|googleapiclient.discovery_cache|file_cache is only supported with oauth2client<4.0.0
2022-11-09 22:02:56,943|INFO|googleapiclient.discovery_cache|file_cache is only supported with oauth2client<4.0.0
Document 0 : Features({'PROFANITY_PROBABILITY': 0.014634945, 'INSULT_PROBABILITY': 0.009051885, 'TOXICITY_PROBABILITY': 0.017843807, 'languages': ['en']})
Document 1 : Features({'PROFANITY_PROBABILITY': 0.7246992, 'TOXICITY_PROBABILITY': 0.8540474, 'INSULT_PROBABILITY': 0.802824, 'languages': ['en']})
Document 2 : Features({'TOXICITY_PROBABILITY': 0.96426713, 'INSULT_PROBABILITY': 0.9343686, 'PROFANITY_PROBABILITY': 0.89909166, 'languages': ['en']})
Document 3 : Features({'PROFANITY_PROBABILITY': 0.87472564, 'TOXICITY_PROBABILITY': 0.9817629, 'INSULT_PROBABILITY': 0.8390655, 'languages': ['en']})
docs[0]
docs[1]
docs[2]
docs[3]
Notebook last updated
import gatenlp
print("NB last updated with gatenlp version", gatenlp.__version__)
NB last updated with gatenlp version 1.0.8a1