{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Stanza pipeline\n", "\n", "If `gatenlp` has been installed with the stanza extra (`pip install gatenlp[stanza]` or `pip install gatenlp[all]`) you can run a Stanford Stanza pipeline on a document and get the result as `gatenlp` annotations. \n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Stanza version: 1.3.0\n" ] } ], "source": [ "from gatenlp import Document\n", "from gatenlp.lib_stanza import AnnStanza\n", "import stanza\n", "\n", "print(\"Stanza version:\", stanza.__version__)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9660b72b145c4be888b8595d8c4f67ed", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.3.0.json: 0%| …" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "2022-06-28 00:02:18,131|INFO|stanza|Downloading default packages for language: en (English)...\n", "2022-06-28 00:02:22,054|INFO|stanza|File exists: /data/johann/stanza_resources/en/default.zip.\n", "2022-06-28 00:02:33,832|INFO|stanza|Finished downloading models and saved to /data/johann/stanza_resources.\n" ] } ], "source": [ "# In order to use the English pipeline with stanza, the model has to get downloaded first\n", "stanza.download('en')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "