David Kagramanyan commited on
Commit
a3c618b
1 Parent(s): cae9ed3
Files changed (2) hide show
  1. deploy_endpoint_fix_spacy.ipynb +182 -0
  2. handler.py +42 -0
deploy_endpoint_fix_spacy.ipynb ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 91,
6
+ "outputs": [],
7
+ "source": [
8
+ "from typing import Any, Dict, List\n",
9
+ "\n",
10
+ "class EndpointHandler():\n",
11
+ " def __init__(\n",
12
+ " self,\n",
13
+ " path: str,\n",
14
+ " ):\n",
15
+ " # self.tagger = SequenceTagger.load(os.path.join(path,\"pytorch_model.bin\"))\n",
16
+ " self.nlp = spacy.load(\".\")\n",
17
+ "\n",
18
+ " def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:\n",
19
+ " \"\"\"\n",
20
+ " Args:\n",
21
+ " inputs (:obj:`str`):\n",
22
+ " a string containing some text\n",
23
+ " Return:\n",
24
+ " A :obj:`list`:. The object returned should be like [{\"entity_group\": \"XXX\", \"word\": \"some word\", \"start\": 3, \"end\": 6, \"score\": 0.82}] containing :\n",
25
+ " - \"entity_group\": A string representing what the entity is.\n",
26
+ " - \"word\": A substring of the original string that was detected as an entity.\n",
27
+ " - \"start\": the offset within `input` leading to `answer`. context[start:stop] == word\n",
28
+ " - \"end\": the ending offset within `input` leading to `answer`. context[start:stop] === word\n",
29
+ " - \"score\": A score between 0 and 1 describing how confident the model is for this entity.\n",
30
+ " \"\"\"\n",
31
+ " inputs = data.pop(\"inputs\", data)\n",
32
+ "\n",
33
+ " doc=self.nlp(inputs)\n",
34
+ "\n",
35
+ " entities = []\n",
36
+ " for span in doc.ents:\n",
37
+ " if len(span.ents) == 0:\n",
38
+ " continue\n",
39
+ " current_entity = {\n",
40
+ " \"entity_group\": span.label_,\n",
41
+ " \"word\": span.text,\n",
42
+ " \"start\": span.start_char,\n",
43
+ " \"end\": span.end_char,\n",
44
+ " # \"score\": span.score,\n",
45
+ " }\n",
46
+ " entities.append(current_entity)\n",
47
+ " \n",
48
+ " return entities\n"
49
+ ],
50
+ "metadata": {
51
+ "collapsed": false,
52
+ "ExecuteTime": {
53
+ "end_time": "2023-10-05T11:56:54.341188400Z",
54
+ "start_time": "2023-10-05T11:56:54.327093400Z"
55
+ }
56
+ },
57
+ "id": "af1d783960762219"
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 95,
62
+ "outputs": [
63
+ {
64
+ "name": "stdout",
65
+ "output_type": "stream",
66
+ "text": [
67
+ "non_holiday_pred [{'entity_group': 'PERSON', 'word': 'George Washington', 'start': 0, 'end': 17}, {'entity_group': 'GPE', 'word': 'Washington', 'start': 28, 'end': 38}]\n"
68
+ ]
69
+ }
70
+ ],
71
+ "source": [
72
+ "my_handler = EndpointHandler(path=\".\")\n",
73
+ "\n",
74
+ "# prepare sample payload\n",
75
+ "non_holiday_payload = {\"inputs\": \"George Washington ging naar Washington\"}\n",
76
+ "\n",
77
+ "\n",
78
+ "# test the handler\n",
79
+ "non_holiday_pred=my_handler(non_holiday_payload)\n",
80
+ "\n",
81
+ "\n",
82
+ "# show results\n",
83
+ "print(\"non_holiday_pred\", non_holiday_pred)\n",
84
+ "\n"
85
+ ],
86
+ "metadata": {
87
+ "collapsed": false,
88
+ "ExecuteTime": {
89
+ "end_time": "2023-10-05T11:57:36.320257400Z",
90
+ "start_time": "2023-10-05T11:57:34.860659500Z"
91
+ }
92
+ },
93
+ "id": "a12c4a4792afc707"
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": 50,
98
+ "outputs": [],
99
+ "source": [
100
+ "import spacy\n",
101
+ "\n",
102
+ "nlp = spacy.load(\".\")\n",
103
+ "# nlp = spacy.load('model')"
104
+ ],
105
+ "metadata": {
106
+ "collapsed": false,
107
+ "ExecuteTime": {
108
+ "end_time": "2023-10-05T11:45:00.500755Z",
109
+ "start_time": "2023-10-05T11:44:59.084649300Z"
110
+ }
111
+ },
112
+ "id": "e8f6555c52db68bb"
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 86,
117
+ "outputs": [
118
+ {
119
+ "name": "stdout",
120
+ "output_type": "stream",
121
+ "text": [
122
+ "U.K. 27 31 PERSON\n",
123
+ "1 45 46 CARDINAL\n",
124
+ "Armenia 74 81 PERSON\n"
125
+ ]
126
+ }
127
+ ],
128
+ "source": [
129
+ "txt=\"Apple is looking at buying U.K. startup for $1 billion and selling it to Armenia\"\n",
130
+ "doc = nlp(txt)\n",
131
+ "\n",
132
+ "for ent in doc.ents:\n",
133
+ " print(ent.text, ent.start_char, ent.end_char, ent.label_)"
134
+ ],
135
+ "metadata": {
136
+ "collapsed": false,
137
+ "ExecuteTime": {
138
+ "end_time": "2023-10-05T11:54:40.099907400Z",
139
+ "start_time": "2023-10-05T11:54:40.073977200Z"
140
+ }
141
+ },
142
+ "id": "301895c94d69a22c"
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 30,
147
+ "outputs": [],
148
+ "source": [
149
+ "model = spacy.load(\"en_core_web_sm\")"
150
+ ],
151
+ "metadata": {
152
+ "collapsed": false,
153
+ "ExecuteTime": {
154
+ "end_time": "2023-10-05T11:20:01.608708400Z",
155
+ "start_time": "2023-10-05T11:20:01.038168700Z"
156
+ }
157
+ },
158
+ "id": "7136bbcc5a994ac"
159
+ }
160
+ ],
161
+ "metadata": {
162
+ "kernelspec": {
163
+ "name": "torch",
164
+ "language": "python",
165
+ "display_name": "torch"
166
+ },
167
+ "language_info": {
168
+ "codemirror_mode": {
169
+ "name": "ipython",
170
+ "version": 2
171
+ },
172
+ "file_extension": ".py",
173
+ "mimetype": "text/x-python",
174
+ "name": "python",
175
+ "nbconvert_exporter": "python",
176
+ "pygments_lexer": "ipython2",
177
+ "version": "2.7.6"
178
+ }
179
+ },
180
+ "nbformat": 4,
181
+ "nbformat_minor": 5
182
+ }
handler.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List
2
+ import spacy
3
+
4
+ class EndpointHandler():
5
+ def __init__(
6
+ self,
7
+ path: str,
8
+ ):
9
+ # self.tagger = SequenceTagger.load(os.path.join(path,"pytorch_model.bin"))
10
+ self.nlp = spacy.load(".")
11
+
12
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
13
+ """
14
+ Args:
15
+ inputs (:obj:`str`):
16
+ a string containing some text
17
+ Return:
18
+ A :obj:`list`:. The object returned should be like [{"entity_group": "XXX", "word": "some word", "start": 3, "end": 6, "score": 0.82}] containing :
19
+ - "entity_group": A string representing what the entity is.
20
+ - "word": A substring of the original string that was detected as an entity.
21
+ - "start": the offset within `input` leading to `answer`. context[start:stop] == word
22
+ - "end": the ending offset within `input` leading to `answer`. context[start:stop] === word
23
+ - "score": A score between 0 and 1 describing how confident the model is for this entity.
24
+ """
25
+ inputs = data.pop("inputs", data)
26
+
27
+ doc=self.nlp(inputs)
28
+
29
+ entities = []
30
+ for span in doc.ents:
31
+ if len(span.ents) == 0:
32
+ continue
33
+ current_entity = {
34
+ "entity_group": span.label_,
35
+ "word": span.text,
36
+ "start": span.start_char,
37
+ "end": span.end_char,
38
+ # "score": span.score,
39
+ }
40
+ entities.append(current_entity)
41
+
42
+ return entities