dkagramanyan
/

nl_core_news_sm_spacy_endpoint

Token Classification

spaCy

Dutch

Eval Results

Inference Endpoints

Model card Files Files and versions Community

David Kagramanyan commited on Oct 5, 2023

Commit

a3c618b

1 Parent(s): cae9ed3

handler

Browse files

Files changed (2) hide show

deploy_endpoint_fix_spacy.ipynb +182 -0
handler.py +42 -0

deploy_endpoint_fix_spacy.ipynb ADDED Viewed

	@@ -0,0 +1,182 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "outputs": [],
+   "source": [
+    "from typing import Any, Dict, List\n",
+    "\n",
+    "class EndpointHandler():\n",
+    "    def __init__(\n",
+    "            self,\n",
+    "            path: str,\n",
+    "    ):\n",
+    "        # self.tagger = SequenceTagger.load(os.path.join(path,\"pytorch_model.bin\"))\n",
+    "        self.nlp = spacy.load(\".\")\n",
+    "\n",
+    "    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:\n",
+    "        \"\"\"\n",
+    "        Args:\n",
+    "            inputs (:obj:`str`):\n",
+    "                a string containing some text\n",
+    "        Return:\n",
+    "            A :obj:`list`:. The object returned should be like [{\"entity_group\": \"XXX\", \"word\": \"some word\", \"start\": 3, \"end\": 6, \"score\": 0.82}] containing :\n",
+    "                - \"entity_group\": A string representing what the entity is.\n",
+    "                - \"word\": A substring of the original string that was detected as an entity.\n",
+    "                - \"start\": the offset within `input` leading to `answer`. context[start:stop] == word\n",
+    "                - \"end\": the ending offset within `input` leading to `answer`. context[start:stop] === word\n",
+    "                - \"score\": A score between 0 and 1 describing how confident the model is for this entity.\n",
+    "        \"\"\"\n",
+    "        inputs = data.pop(\"inputs\", data)\n",
+    "\n",
+    "        doc=self.nlp(inputs)\n",
+    "\n",
+    "        entities = []\n",
+    "        for span in doc.ents:\n",
+    "            if len(span.ents) == 0:\n",
+    "                continue\n",
+    "            current_entity = {\n",
+    "                \"entity_group\": span.label_,\n",
+    "                \"word\": span.text,\n",
+    "                \"start\": span.start_char,\n",
+    "                \"end\": span.end_char,\n",
+    "                # \"score\": span.score,\n",
+    "            }\n",
+    "            entities.append(current_entity)\n",
+    "            \n",
+    "        return entities\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-05T11:56:54.341188400Z",
+     "start_time": "2023-10-05T11:56:54.327093400Z"
+    }
+   },
+   "id": "af1d783960762219"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "non_holiday_pred [{'entity_group': 'PERSON', 'word': 'George Washington', 'start': 0, 'end': 17}, {'entity_group': 'GPE', 'word': 'Washington', 'start': 28, 'end': 38}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "my_handler = EndpointHandler(path=\".\")\n",
+    "\n",
+    "# prepare sample payload\n",
+    "non_holiday_payload = {\"inputs\": \"George Washington ging naar Washington\"}\n",
+    "\n",
+    "\n",
+    "# test the handler\n",
+    "non_holiday_pred=my_handler(non_holiday_payload)\n",
+    "\n",
+    "\n",
+    "# show results\n",
+    "print(\"non_holiday_pred\", non_holiday_pred)\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-05T11:57:36.320257400Z",
+     "start_time": "2023-10-05T11:57:34.860659500Z"
+    }
+   },
+   "id": "a12c4a4792afc707"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "outputs": [],
+   "source": [
+    "import spacy\n",
+    "\n",
+    "nlp = spacy.load(\".\")\n",
+    "# nlp = spacy.load('model')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-05T11:45:00.500755Z",
+     "start_time": "2023-10-05T11:44:59.084649300Z"
+    }
+   },
+   "id": "e8f6555c52db68bb"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "U.K. 27 31 PERSON\n",
+      "1 45 46 CARDINAL\n",
+      "Armenia 74 81 PERSON\n"
+     ]
+    }
+   ],
+   "source": [
+    "txt=\"Apple is looking at buying U.K. startup for $1 billion  and selling it to Armenia\"\n",
+    "doc = nlp(txt)\n",
+    "\n",
+    "for ent in doc.ents:\n",
+    "    print(ent.text, ent.start_char, ent.end_char, ent.label_)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-05T11:54:40.099907400Z",
+     "start_time": "2023-10-05T11:54:40.073977200Z"
+    }
+   },
+   "id": "301895c94d69a22c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "outputs": [],
+   "source": [
+    "model = spacy.load(\"en_core_web_sm\")"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-10-05T11:20:01.608708400Z",
+     "start_time": "2023-10-05T11:20:01.038168700Z"
+    }
+   },
+   "id": "7136bbcc5a994ac"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "torch",
+   "language": "python",
+   "display_name": "torch"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

handler.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from typing import Any, Dict, List
+import spacy
+class EndpointHandler():
+    def __init__(
+            self,
+            path: str,
+    ):
+        # self.tagger = SequenceTagger.load(os.path.join(path,"pytorch_model.bin"))
+        self.nlp = spacy.load(".")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Args:
+            inputs (:obj:`str`):
+                a string containing some text
+        Return:
+            A :obj:`list`:. The object returned should be like [{"entity_group": "XXX", "word": "some word", "start": 3, "end": 6, "score": 0.82}] containing :
+                - "entity_group": A string representing what the entity is.
+                - "word": A substring of the original string that was detected as an entity.
+                - "start": the offset within `input` leading to `answer`. context[start:stop] == word
+                - "end": the ending offset within `input` leading to `answer`. context[start:stop] === word
+                - "score": A score between 0 and 1 describing how confident the model is for this entity.
+        """
+        inputs = data.pop("inputs", data)
+        doc=self.nlp(inputs)
+        entities = []
+        for span in doc.ents:
+            if len(span.ents) == 0:
+                continue
+            current_entity = {
+                "entity_group": span.label_,
+                "word": span.text,
+                "start": span.start_char,
+                "end": span.end_char,
+                # "score": span.score,
+            }
+            entities.append(current_entity)
+        return entities