{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Rewire Annotator\n", "\n", "The `RewireAnnotator` is an annotator that uses the [Rewire](https://rewire.online/rewire-api-access/) service to annotate documents. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "from gatenlp import Document\n", "from gatenlp.processing.client.rewire import RewireAnnotator\n", "from gatenlp.lib_spacy import AnnSpacy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [], "source": [ "apikey = os.environ[\"REWIRE_KEY\"] " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "docs = [\n", " Document(\"Barack Obama visited Microsoft in New York last May.\"),\n", " Document(\"\"\"This is just some example text. \n", " Has a sentence that talks about shit in general. \n", " And another talking about 💩💩💩💩 in general. This guy is a moron.\"\"\"),\n", " Document(\"What a stupid bitch she is.\"),\n", " Document(\"I am going to kill you, asshole!\"),\n", "]\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/johann/software/anaconda/envs/gatenlp-37/lib/python3.7/site-packages/spacy/util.py:837: UserWarning: [W095] Model 'en_core_web_sm' (3.2.0) was trained with spaCy v3.2 and may not be 100% compatible with the current version (3.3.1). If you see errors or degraded performance, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", " warnings.warn(warn_msg)\n" ] } ], "source": [ "# Just use the sentence annotations for annotating texts on a per-sentence level later\n", "anntr = AnnSpacy(add_tokens=False, add_entities=False, add_sentences=True, add_nounchunks=False, add_deps=False)\n", "for doc in docs:\n", " doc = anntr(doc)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Document 0 : Features({'abuse': 0.007695647422224283, 'hate': 0.01182339433580637, 'profanity': 0.0, 'violent': 0.00013819332525599748, 'sexually_explicit': 0.0007839425234124064, 'positive': 0.03010033629834652})\n", "Document 1 : Features({'abuse': 0.9521309733390808, 'hate': 0.07565305382013321, 'profanity': 1.0, 'violent': 0.00013135180051904172, 'sexually_explicit': 0.0008426422718912363, 'positive': 0.03213106095790863})\n", "Document 2 : Features({'abuse': 0.9487276077270508, 'hate': 0.07640768587589264, 'profanity': 1.0, 'violent': 0.00017197855049744248, 'sexually_explicit': 0.0007524627144448459, 'positive': 0.051224932074546814})\n", "Document 3 : Features({'abuse': 0.9973989725112915, 'hate': 0.03146960213780403, 'profanity': 1.0, 'violent': 0.9915707111358643, 'sexually_explicit': 0.003027835162356496, 'positive': 0.05033063143491745})\n" ] } ], "source": [ "# Annotator for annotating documents as a whole\n", "rewire_doc = RewireAnnotator(auth_token=apikey)\n", "# Annotator for annotating sentences\n", "rewire_sent = RewireAnnotator(auth_token=apikey, ann_type=\"Sentence\")\n", "\n", "# run both annotators over all documents, show the document features assigned for each\n", "for idx, doc in enumerate(docs):\n", " doc.features.clear() \n", " rewire_doc(doc)\n", " print(\"Document\", idx, \":\", doc.features)\n", " rewire_sent(doc)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", "
\n", "\n", "
" ], "text/plain": [ "Document(Barack Obama visited Microsoft in New York last May.,features=Features({'abuse': 0.007695647422224283, 'hate': 0.01182339433580637, 'profanity': 0.0, 'violent': 0.00013819332525599748, 'sexually_explicit': 0.0007839425234124064, 'positive': 0.03010033629834652}),anns={'': AnnotationSet([Annotation(0,6,Token,features=Features({'_i': 0, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': True, 'is_space': False, 'is_stop': False, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'Barack', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 15388493565120789335, 'pos': 'PROPN', 'prefix': 'B', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': 'Xxxxx', 'suffix': 'ack'}),id=0), Annotation(0,52,Sentence,features=Features({'abuse': 0.007695647422224283, 'hate': 0.01182339433580637, 'profanity': 0.0, 'violent': 0.00013819332525599748, 'sexually_explicit': 0.0007839425234124064, 'positive': 0.03010033629834652}),id=17), Annotation(6,7,SpaceToken,features=Features({'is_space': True}),id=1), Annotation(7,12,Token,features=Features({'_i': 1, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'Obama', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 4857242187112322394, 'pos': 'PROPN', 'prefix': 'O', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': 'Xxxxx', 'suffix': 'ama'}),id=2), Annotation(12,13,SpaceToken,features=Features({'is_space': True}),id=3), Annotation(13,20,Token,features=Features({'_i': 2, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'visit', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 11749534953085588517, 'pos': 'VERB', 'prefix': 'v', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBD', 'shape': 'xxxx', 'suffix': 'ted'}),id=4), Annotation(20,21,SpaceToken,features=Features({'is_space': True}),id=5), Annotation(21,30,Token,features=Features({'_i': 3, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'Microsoft', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 5291444330391917710, 'pos': 'PROPN', 'prefix': 'M', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': 'Xxxxx', 'suffix': 'oft'}),id=6), Annotation(30,31,SpaceToken,features=Features({'is_space': True}),id=7), Annotation(31,33,Token,features=Features({'_i': 4, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'in', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3002984154512732771, 'pos': 'ADP', 'prefix': 'i', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'IN', 'shape': 'xx', 'suffix': 'in'}),id=8), Annotation(33,34,SpaceToken,features=Features({'is_space': True}),id=9), Annotation(34,37,Token,features=Features({'_i': 5, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'New', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 7503827727184870577, 'pos': 'PROPN', 'prefix': 'N', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': 'Xxx', 'suffix': 'New'}),id=10), Annotation(37,38,SpaceToken,features=Features({'is_space': True}),id=11), Annotation(38,42,Token,features=Features({'_i': 6, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'York', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 7898044819112200372, 'pos': 'PROPN', 'prefix': 'Y', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': 'Xxxx', 'suffix': 'ork'}),id=12), Annotation(42,43,SpaceToken,features=Features({'is_space': True}),id=13), Annotation(43,47,Token,features=Features({'_i': 7, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'last', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 10321518907502812892, 'pos': 'ADJ', 'prefix': 'l', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'JJ', 'shape': 'xxxx', 'suffix': 'ast'}),id=14), Annotation(47,48,SpaceToken,features=Features({'is_space': True}),id=15), Annotation(48,52,Token,features=Features({'_i': 8, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'May.', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3587261033131107511, 'pos': 'PROPN', 'prefix': 'M', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': 'Xxx.', 'suffix': 'ay.'}),id=16)])})" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "docs[0]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", "
\n", "\n", "
" ], "text/plain": [ "Document(This is just some example text. \n", " Has a sentence that talks about shit in general. \n", " And another talking about 💩💩💩💩 in general. This guy is a moron.,features=Features({'abuse': 0.9521309733390808, 'hate': 0.07565305382013321, 'profanity': 1.0, 'violent': 0.00013135180051904172, 'sexually_explicit': 0.0008426422718912363, 'positive': 0.03213106095790863}),anns={'': AnnotationSet([Annotation(0,4,Token,features=Features({'_i': 0, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': True, 'is_space': False, 'is_stop': True, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'this', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12943039165150086467, 'pos': 'PRON', 'prefix': 'T', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'DT', 'shape': 'Xxxx', 'suffix': 'his'}),id=0), Annotation(0,31,Sentence,features=Features({'abuse': 0.004905405919998884, 'hate': 0.015247589908540249, 'profanity': 0.0, 'violent': 0.0001541633391752839, 'sexually_explicit': 0.0006706176209263504, 'positive': 0.03656250610947609}),id=62), Annotation(4,5,SpaceToken,features=Features({'is_space': True}),id=1), Annotation(5,7,Token,features=Features({'_i': 1, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'be', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3411606890003347522, 'pos': 'AUX', 'prefix': 'i', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBZ', 'shape': 'xx', 'suffix': 'is'}),id=2), Annotation(7,8,SpaceToken,features=Features({'is_space': True}),id=3), Annotation(8,12,Token,features=Features({'_i': 2, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'just', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 7148522813498185515, 'pos': 'ADV', 'prefix': 'j', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'RB', 'shape': 'xxxx', 'suffix': 'ust'}),id=4), Annotation(12,13,SpaceToken,features=Features({'is_space': True}),id=5), Annotation(13,17,Token,features=Features({'_i': 3, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'some', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 7000492816108906599, 'pos': 'DET', 'prefix': 's', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'DT', 'shape': 'xxxx', 'suffix': 'ome'}),id=6), Annotation(17,18,SpaceToken,features=Features({'is_space': True}),id=7), Annotation(18,25,Token,features=Features({'_i': 4, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'example', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 899618643364689362, 'pos': 'NOUN', 'prefix': 'e', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NN', 'shape': 'xxxx', 'suffix': 'ple'}),id=8), Annotation(25,26,SpaceToken,features=Features({'is_space': True}),id=9), Annotation(26,30,Token,features=Features({'_i': 5, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'text', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 15099781594404091470, 'pos': 'NOUN', 'prefix': 't', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NN', 'shape': 'xxxx', 'suffix': 'ext'}),id=10), Annotation(30,31,Token,features=Features({'_i': 6, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': True, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '.', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12646065887601541794, 'pos': 'PUNCT', 'prefix': '.', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '.', 'shape': '.', 'suffix': '.'}),id=11), Annotation(31,32,SpaceToken,features=Features({'is_space': True}),id=12), Annotation(32,39,SpaceToken,features=Features({'_i': 7, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': True, 'is_space': True, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '\\n ', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 2877995683885865957, 'pos': 'SPACE', 'prefix': '\\n', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '_SP', 'shape': '\\n ', 'suffix': ' '}),id=13), Annotation(32,87,Sentence,features=Features({'abuse': 0.28145498037338257, 'hate': 0.004390866495668888, 'profanity': 1.0, 'violent': 0.00041731575038284063, 'sexually_explicit': 0.000608261616434902, 'positive': 0.03507867455482483}),id=63), Annotation(39,42,Token,features=Features({'_i': 8, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'have', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 13124584293511252182, 'pos': 'VERB', 'prefix': 'H', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBZ', 'shape': 'Xxx', 'suffix': 'Has'}),id=14), Annotation(42,43,SpaceToken,features=Features({'is_space': True}),id=15), Annotation(43,44,Token,features=Features({'_i': 9, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'a', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 11901859001352538922, 'pos': 'DET', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'DT', 'shape': 'x', 'suffix': 'a'}),id=16), Annotation(44,45,SpaceToken,features=Features({'is_space': True}),id=17), Annotation(45,53,Token,features=Features({'_i': 10, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'sentence', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 18108853898452662235, 'pos': 'NOUN', 'prefix': 's', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NN', 'shape': 'xxxx', 'suffix': 'nce'}),id=18), Annotation(53,54,SpaceToken,features=Features({'is_space': True}),id=19), Annotation(54,58,Token,features=Features({'_i': 11, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'that', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 4380130941430378203, 'pos': 'PRON', 'prefix': 't', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'WDT', 'shape': 'xxxx', 'suffix': 'hat'}),id=20), Annotation(58,59,SpaceToken,features=Features({'is_space': True}),id=21), Annotation(59,64,Token,features=Features({'_i': 12, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'talk', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 433190433744739523, 'pos': 'VERB', 'prefix': 't', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBZ', 'shape': 'xxxx', 'suffix': 'lks'}),id=22), Annotation(64,65,SpaceToken,features=Features({'is_space': True}),id=23), Annotation(65,70,Token,features=Features({'_i': 13, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'about', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 942632335873952620, 'pos': 'ADP', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'IN', 'shape': 'xxxx', 'suffix': 'out'}),id=24), Annotation(70,71,SpaceToken,features=Features({'is_space': True}),id=25), Annotation(71,75,Token,features=Features({'_i': 14, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'shit', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 14012806951969610844, 'pos': 'NOUN', 'prefix': 's', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NN', 'shape': 'xxxx', 'suffix': 'hit'}),id=26), Annotation(75,76,SpaceToken,features=Features({'is_space': True}),id=27), Annotation(76,78,Token,features=Features({'_i': 15, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'in', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3002984154512732771, 'pos': 'ADP', 'prefix': 'i', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'IN', 'shape': 'xx', 'suffix': 'in'}),id=28), Annotation(78,79,SpaceToken,features=Features({'is_space': True}),id=29), Annotation(79,86,Token,features=Features({'_i': 16, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'general', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 4476931165537661438, 'pos': 'ADJ', 'prefix': 'g', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'JJ', 'shape': 'xxxx', 'suffix': 'ral'}),id=30), Annotation(86,87,Token,features=Features({'_i': 17, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': True, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '.', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12646065887601541794, 'pos': 'PUNCT', 'prefix': '.', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '.', 'shape': '.', 'suffix': '.'}),id=31), Annotation(87,88,SpaceToken,features=Features({'is_space': True}),id=32), Annotation(88,95,SpaceToken,features=Features({'_i': 18, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': True, 'is_space': True, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '\\n ', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 2877995683885865957, 'pos': 'SPACE', 'prefix': '\\n', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '_SP', 'shape': '\\n ', 'suffix': ' '}),id=33), Annotation(88,137,Sentence,features=Features({'abuse': 0.9700134992599487, 'hate': 0.013248329982161522, 'profanity': 0.0, 'violent': 0.0002684288483578712, 'sexually_explicit': 0.0006337125669233501, 'positive': 0.037785764783620834}),id=64), Annotation(95,98,Token,features=Features({'_i': 19, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'and', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12172435438170721471, 'pos': 'CCONJ', 'prefix': 'A', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'CC', 'shape': 'Xxx', 'suffix': 'And'}),id=34), Annotation(98,99,SpaceToken,features=Features({'is_space': True}),id=35), Annotation(99,106,Token,features=Features({'_i': 20, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'another', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 7270490914741406701, 'pos': 'PRON', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'DT', 'shape': 'xxxx', 'suffix': 'her'}),id=36), Annotation(106,107,SpaceToken,features=Features({'is_space': True}),id=37), Annotation(107,114,Token,features=Features({'_i': 21, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'talk', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3577425109143670181, 'pos': 'VERB', 'prefix': 't', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBG', 'shape': 'xxxx', 'suffix': 'ing'}),id=38), Annotation(114,115,SpaceToken,features=Features({'is_space': True}),id=39), Annotation(115,120,Token,features=Features({'_i': 22, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'about', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 942632335873952620, 'pos': 'ADP', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'IN', 'shape': 'xxxx', 'suffix': 'out'}),id=40), Annotation(120,121,SpaceToken,features=Features({'is_space': True}),id=41), Annotation(121,122,Token,features=Features({'_i': 23, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '💩', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 2293946526295596255, 'pos': 'PROPN', 'prefix': '💩', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': '💩', 'suffix': '💩'}),id=42), Annotation(122,123,Token,features=Features({'_i': 24, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '💩', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 2293946526295596255, 'pos': 'NUM', 'prefix': '💩', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'CD', 'shape': '💩', 'suffix': '💩'}),id=43), Annotation(123,124,Token,features=Features({'_i': 25, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '💩', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 2293946526295596255, 'pos': 'NUM', 'prefix': '💩', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'CD', 'shape': '💩', 'suffix': '💩'}),id=44), Annotation(124,125,Token,features=Features({'_i': 26, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '💩', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 2293946526295596255, 'pos': 'PROPN', 'prefix': '💩', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NNP', 'shape': '💩', 'suffix': '💩'}),id=45), Annotation(125,126,SpaceToken,features=Features({'is_space': True}),id=46), Annotation(126,128,Token,features=Features({'_i': 27, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'in', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3002984154512732771, 'pos': 'ADP', 'prefix': 'i', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'IN', 'shape': 'xx', 'suffix': 'in'}),id=47), Annotation(128,129,SpaceToken,features=Features({'is_space': True}),id=48), Annotation(129,136,Token,features=Features({'_i': 28, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'general', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 4476931165537661438, 'pos': 'ADJ', 'prefix': 'g', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'JJ', 'shape': 'xxxx', 'suffix': 'ral'}),id=49), Annotation(136,137,Token,features=Features({'_i': 29, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': True, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '.', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12646065887601541794, 'pos': 'PUNCT', 'prefix': '.', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '.', 'shape': '.', 'suffix': '.'}),id=50), Annotation(137,138,SpaceToken,features=Features({'is_space': True}),id=51), Annotation(138,142,Token,features=Features({'_i': 30, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': True, 'is_space': False, 'is_stop': True, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'this', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12943039165150086467, 'pos': 'DET', 'prefix': 'T', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'DT', 'shape': 'Xxxx', 'suffix': 'his'}),id=52), Annotation(138,158,Sentence,features=Features({'abuse': 0.9941347241401672, 'hate': 0.07159119844436646, 'profanity': 1.0, 'violent': 0.00019487277313601226, 'sexually_explicit': 0.0008509044419042766, 'positive': 0.05797139182686806}),id=65), Annotation(142,143,SpaceToken,features=Features({'is_space': True}),id=53), Annotation(143,146,Token,features=Features({'_i': 31, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'guy', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 13296119330568554208, 'pos': 'NOUN', 'prefix': 'g', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NN', 'shape': 'xxx', 'suffix': 'guy'}),id=54), Annotation(146,147,SpaceToken,features=Features({'is_space': True}),id=55), Annotation(147,149,Token,features=Features({'_i': 32, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'be', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3411606890003347522, 'pos': 'AUX', 'prefix': 'i', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBZ', 'shape': 'xx', 'suffix': 'is'}),id=56), Annotation(149,150,SpaceToken,features=Features({'is_space': True}),id=57), Annotation(150,151,Token,features=Features({'_i': 33, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'a', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 11901859001352538922, 'pos': 'DET', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'DT', 'shape': 'x', 'suffix': 'a'}),id=58), Annotation(151,152,SpaceToken,features=Features({'is_space': True}),id=59), Annotation(152,157,Token,features=Features({'_i': 34, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'moron', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 4161779678360877177, 'pos': 'NOUN', 'prefix': 'm', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NN', 'shape': 'xxxx', 'suffix': 'ron'}),id=60), Annotation(157,158,Token,features=Features({'_i': 35, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': True, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '.', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12646065887601541794, 'pos': 'PUNCT', 'prefix': '.', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '.', 'shape': '.', 'suffix': '.'}),id=61)])})" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "docs[1]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", "
\n", "\n", "
" ], "text/plain": [ "Document(What a stupid bitch she is.,features=Features({'abuse': 0.9487276077270508, 'hate': 0.07640768587589264, 'profanity': 1.0, 'violent': 0.00017197855049744248, 'sexually_explicit': 0.0007524627144448459, 'positive': 0.051224932074546814}),anns={'': AnnotationSet([Annotation(0,4,Token,features=Features({'_i': 0, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': True, 'is_space': False, 'is_stop': True, 'is_title': True, 'is_upper': False, 'lang': 'en', 'lemma': 'what', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 5931147113347055926, 'pos': 'PRON', 'prefix': 'W', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'WP', 'shape': 'Xxxx', 'suffix': 'hat'}),id=0), Annotation(0,27,Sentence,features=Features({'abuse': 0.9487276077270508, 'hate': 0.07640768587589264, 'profanity': 1.0, 'violent': 0.00017197855049744248, 'sexually_explicit': 0.0007524627144448459, 'positive': 0.051224932074546814}),id=12), Annotation(4,5,SpaceToken,features=Features({'is_space': True}),id=1), Annotation(5,6,Token,features=Features({'_i': 1, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'a', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 11901859001352538922, 'pos': 'DET', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'DT', 'shape': 'x', 'suffix': 'a'}),id=2), Annotation(6,7,SpaceToken,features=Features({'is_space': True}),id=3), Annotation(7,13,Token,features=Features({'_i': 2, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'stupid', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 9673440059141067876, 'pos': 'ADJ', 'prefix': 's', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'JJ', 'shape': 'xxxx', 'suffix': 'pid'}),id=4), Annotation(13,14,SpaceToken,features=Features({'is_space': True}),id=5), Annotation(14,19,Token,features=Features({'_i': 3, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'bitch', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 10512814226314425837, 'pos': 'NOUN', 'prefix': 'b', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'NN', 'shape': 'xxxx', 'suffix': 'tch'}),id=6), Annotation(19,20,SpaceToken,features=Features({'is_space': True}),id=7), Annotation(20,23,Token,features=Features({'_i': 4, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'she', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 6740321247510922449, 'pos': 'PRON', 'prefix': 's', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'PRP', 'shape': 'xxx', 'suffix': 'she'}),id=8), Annotation(23,24,SpaceToken,features=Features({'is_space': True}),id=9), Annotation(24,26,Token,features=Features({'_i': 5, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'be', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3411606890003347522, 'pos': 'AUX', 'prefix': 'i', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBZ', 'shape': 'xx', 'suffix': 'is'}),id=10), Annotation(26,27,Token,features=Features({'_i': 6, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': True, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '.', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 12646065887601541794, 'pos': 'PUNCT', 'prefix': '.', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '.', 'shape': '.', 'suffix': '.'}),id=11)])})" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "docs[2]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", "
\n", "\n", "
" ], "text/plain": [ "Document(I am going to kill you, asshole!,features=Features({'abuse': 0.9973989725112915, 'hate': 0.03146960213780403, 'profanity': 1.0, 'violent': 0.9915707111358643, 'sexually_explicit': 0.003027835162356496, 'positive': 0.05033063143491745}),anns={'': AnnotationSet([Annotation(0,1,Token,features=Features({'_i': 0, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': True, 'is_space': False, 'is_stop': True, 'is_title': True, 'is_upper': True, 'lang': 'en', 'lemma': 'I', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 4690420944186131903, 'pos': 'PRON', 'prefix': 'I', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'PRP', 'shape': 'X', 'suffix': 'I'}),id=0), Annotation(0,32,Sentence,features=Features({'abuse': 0.9973989725112915, 'hate': 0.03146960213780403, 'profanity': 1.0, 'violent': 0.9915707111358643, 'sexually_explicit': 0.003027835162356496, 'positive': 0.05033063143491745}),id=15), Annotation(1,2,SpaceToken,features=Features({'is_space': True}),id=1), Annotation(2,4,Token,features=Features({'_i': 1, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'be', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 959164148857638496, 'pos': 'AUX', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBP', 'shape': 'xx', 'suffix': 'am'}),id=2), Annotation(4,5,SpaceToken,features=Features({'is_space': True}),id=3), Annotation(5,10,Token,features=Features({'_i': 2, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'go', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 14242014708739787450, 'pos': 'VERB', 'prefix': 'g', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VBG', 'shape': 'xxxx', 'suffix': 'ing'}),id=4), Annotation(10,11,SpaceToken,features=Features({'is_space': True}),id=5), Annotation(11,13,Token,features=Features({'_i': 3, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'to', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3791531372978436496, 'pos': 'PART', 'prefix': 't', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'TO', 'shape': 'xx', 'suffix': 'to'}),id=6), Annotation(13,14,SpaceToken,features=Features({'is_space': True}),id=7), Annotation(14,18,Token,features=Features({'_i': 4, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'kill', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 3883960749573218104, 'pos': 'VERB', 'prefix': 'k', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'VB', 'shape': 'xxxx', 'suffix': 'ill'}),id=8), Annotation(18,19,SpaceToken,features=Features({'is_space': True}),id=9), Annotation(19,22,Token,features=Features({'_i': 5, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': True, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'you', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 7624161793554793053, 'pos': 'PRON', 'prefix': 'y', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'PRP', 'shape': 'xxx', 'suffix': 'you'}),id=10), Annotation(22,23,Token,features=Features({'_i': 6, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': True, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': ',', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 2593208677638477497, 'pos': 'PUNCT', 'prefix': ',', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': ',', 'shape': ',', 'suffix': ','}),id=11), Annotation(23,24,SpaceToken,features=Features({'is_space': True}),id=12), Annotation(24,31,Token,features=Features({'_i': 7, 'is_alpha': True, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': True, 'is_oov': True, 'is_punct': False, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': 'asshole', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 18372025554415990910, 'pos': 'ADJ', 'prefix': 'a', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': 'JJ', 'shape': 'xxxx', 'suffix': 'ole'}),id=13), Annotation(31,32,Token,features=Features({'_i': 8, 'is_alpha': False, 'is_bracket': False, 'is_currency': False, 'is_digit': False, 'is_left_punct': False, 'is_lower': False, 'is_oov': True, 'is_punct': True, 'is_quote': False, 'is_right_punct': False, 'is_sent_start': False, 'is_space': False, 'is_stop': False, 'is_title': False, 'is_upper': False, 'lang': 'en', 'lemma': '!', 'like_email': False, 'like_num': False, 'like_url': False, 'orth': 17494803046312582752, 'pos': 'PUNCT', 'prefix': '!', 'prob': -20.0, 'rank': 18446744073709551615, 'sentiment': 0.0, 'tag': '.', 'shape': '!', 'suffix': '!'}),id=14)])})" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "docs[3]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Notebook last updated" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NB last updated with gatenlp version 1.0.8a1\n" ] } ], "source": [ "import gatenlp\n", "print(\"NB last updated with gatenlp version\", gatenlp.__version__)" ] } ], "metadata": { "kernelspec": { "display_name": "gatenlp-37", "language": "python", "name": "gatenlp-37" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }