{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Rewire Annotator\n", "\n", "The `RewireAnnotator` is an annotator that uses the [Rewire](https://rewire.online/rewire-api-access/) service to annotate documents. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "from gatenlp import Document\n", "from gatenlp.processing.client.rewire import RewireAnnotator\n", "from gatenlp.lib_spacy import AnnSpacy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [], "source": [ "apikey = os.environ[\"REWIRE_KEY\"] " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "docs = [\n", " Document(\"Barack Obama visited Microsoft in New York last May.\"),\n", " Document(\"\"\"This is just some example text. \n", " Has a sentence that talks about shit in general. \n", " And another talking about 💩💩💩💩 in general. This guy is a moron.\"\"\"),\n", " Document(\"What a stupid bitch she is.\"),\n", " Document(\"I am going to kill you, asshole!\"),\n", "]\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/johann/software/anaconda/envs/gatenlp-37/lib/python3.7/site-packages/spacy/util.py:837: UserWarning: [W095] Model 'en_core_web_sm' (3.2.0) was trained with spaCy v3.2 and may not be 100% compatible with the current version (3.3.1). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", " warnings.warn(warn_msg)\n" ] } ], "source": [ "# Just use the sentence annotations for annotating texts on a per-sentence level later\n", "anntr = AnnSpacy(add_tokens=False, add_entities=False, add_sentences=True, add_nounchunks=False, add_deps=False)\n", "for doc in docs:\n", " doc = anntr(doc)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Document 0 : Features({'abuse': 0.007695647422224283, 'hate': 0.01182339433580637, 'profanity': 0.0, 'violent': 0.00013819332525599748, 'sexually_explicit': 0.0007839425234124064, 'positive': 0.03010033629834652})\n", "Document 1 : Features({'abuse': 0.9521309733390808, 'hate': 0.07565305382013321, 'profanity': 1.0, 'violent': 0.00013135180051904172, 'sexually_explicit': 0.0008426422718912363, 'positive': 0.03213106095790863})\n", "Document 2 : Features({'abuse': 0.9487276077270508, 'hate': 0.07640768587589264, 'profanity': 1.0, 'violent': 0.00017197855049744248, 'sexually_explicit': 0.0007524627144448459, 'positive': 0.051224932074546814})\n", "Document 3 : Features({'abuse': 0.9973989725112915, 'hate': 0.03146960213780403, 'profanity': 1.0, 'violent': 0.9915707111358643, 'sexually_explicit': 0.003027835162356496, 'positive': 0.05033063143491745})\n" ] } ], "source": [ "# Annotator for annotating documents as a whole\n", "rewire_doc = RewireAnnotator(auth_token=apikey)\n", "# Annotator for annotating sentences\n", "rewire_sent = RewireAnnotator(auth_token=apikey, ann_type=\"Sentence\")\n", "\n", "# run both annotators over all documents, show the document features assigned for each\n", "for idx, doc in enumerate(docs):\n", " doc.features.clear() \n", " rewire_doc(doc)\n", " print(\"Document\", idx, \":\", doc.features)\n", " rewire_sent(doc)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "