Lamp Socrates commited on
Commit
4ad296d
1 Parent(s): c6d6e27

Latest commit

Browse files
Files changed (4) hide show
  1. app.py +97 -17
  2. locust_stress.py +7 -0
  3. requirements.txt +2 -1
  4. stresstest_ner_service.ipynb +301 -0
app.py CHANGED
@@ -1,51 +1,131 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  from fastapi import FastAPI
4
  from pydantic import BaseModel
5
  from typing import List, Dict
6
 
7
- # Initialize the NER pipeline
8
- ner_model = pipeline("ner", grouped_entities=True)
9
-
10
  # Define the FastAPI app
11
  app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Define the request and response models for the API
14
- class NERRequest(BaseModel):
15
- text: str
16
 
17
  class Entity(BaseModel):
18
- entity_group: str
 
19
  start: int
20
  end: int
21
- score: float
22
  word: str
23
 
24
  class NERResponse(BaseModel):
25
  entities: List[Entity]
26
 
 
 
 
 
 
 
 
 
27
  @app.post("/ner", response_model=NERResponse)
28
  def get_entities(request: NERRequest):
 
 
29
  # Use the NER model to detect entities
30
- entities = ner_model(request.text)
 
31
  # Convert entities to the response model
32
  response_entities = [Entity(**entity) for entity in entities]
 
33
  return NERResponse(entities=response_entities)
34
 
 
 
 
 
 
 
 
 
 
 
 
35
  # Define the Gradio interface function
36
  def ner_demo(text):
37
- entities = ner_model(text)
38
- return {"entities": entities}
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
40
  # Create the Gradio interface
41
- iface = gr.Interface(
42
  fn=ner_demo,
43
  inputs=gr.Textbox(lines=10, placeholder="Enter text here..."),
44
- outputs=gr.JSON(),
45
- title="Named Entity Recognition",
46
- description="Enter text to extract named entities using a NER model."
 
47
  )
48
 
49
- # Launch the Gradio interface
50
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
51
 
 
1
+ import uvicorn
2
+ import threading
3
+ from typing import Optional
4
+ from transformers import pipeline
5
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
6
+ import pandas as pd
7
+ #import datasets
8
+ from pprint import pprint
9
+
10
  import gradio as gr
11
  from transformers import pipeline
12
  from fastapi import FastAPI
13
  from pydantic import BaseModel
14
  from typing import List, Dict
15
 
 
 
 
16
  # Define the FastAPI app
17
  app = FastAPI()
18
+ model_cache: Optional[object] = None
19
+
20
+ def load_model():
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
23
+ model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
24
+ # Mapping labels
25
+ id2label = model.config.id2label
26
+ # Print the label mapping
27
+ print(f"Can recognise the following labels {id2label}")
28
+
29
+ # Load the NER model and tokenizer from Hugging Face
30
+ #ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
31
+ model = pipeline("ner", model=model, tokenizer = tokenizer)
32
+ return model
33
+
34
+ def load_plod_cw_dataset():
35
+ from datasets import load_dataset
36
+ dataset = load_dataset("surrey-nlp/PLOD-CW")
37
+ return dataset
38
+
39
+ def get_cached_model():
40
+ global model_cache
41
+ if model_cache is None:
42
+ model_cache = load_model()
43
+ return model_cache
44
+
45
+ # Cache the model when the server starts
46
+ model = get_cached_model()
47
+
48
 
 
 
 
49
 
50
  class Entity(BaseModel):
51
+ entity: str
52
+ score: float
53
  start: int
54
  end: int
 
55
  word: str
56
 
57
  class NERResponse(BaseModel):
58
  entities: List[Entity]
59
 
60
+ class NERRequest(BaseModel):
61
+ text: str
62
+
63
+ @app.get("/hello")
64
+ def read_root():
65
+ return {"message": "Hello, World!"}
66
+
67
+
68
  @app.post("/ner", response_model=NERResponse)
69
  def get_entities(request: NERRequest):
70
+ print(request)
71
+ model = get_cached_model()
72
  # Use the NER model to detect entities
73
+ entities = model(request.text)
74
+ print(entities[0].keys())
75
  # Convert entities to the response model
76
  response_entities = [Entity(**entity) for entity in entities]
77
+ print(response_entities[0])
78
  return NERResponse(entities=response_entities)
79
 
80
+ def get_color_for_label(label: str) -> str:
81
+ # Define a mapping of labels to colors
82
+ color_mapping = {
83
+ "I-LF": "red",
84
+ "B-AC": "blue",
85
+ "LOC": "green",
86
+ # Add more labels and colors as needed
87
+ }
88
+ return color_mapping.get(label, "black") # Default to black if label not found
89
+
90
+
91
  # Define the Gradio interface function
92
  def ner_demo(text):
93
+ model = get_cached_model()
94
+ entities = model(text)
95
+ #return {"entities": entities}
96
+
97
+ # Color code the entities
98
+ color_coded_text = text
99
+ for entity in entities:
100
+ #print(entity)
101
+ start, end, label = entity["start"], entity["end"], entity["entity"]
102
+ color = get_color_for_label(label) # You need to define this function
103
+ entity_text = text[start:end]
104
+ colored_entity = f'<span style="color: {color}; font-weight: bold;">{entity_text}</span>'
105
+ color_coded_text = color_coded_text[:start] + colored_entity + color_coded_text[end:]
106
+
107
+ return color_coded_text
108
 
109
+ PROJECT_INTRO = "This is a HF Spaces hosted Gradio App built by NLP Group 27 . The model has been trained on surrey-nlp/PLOD-CW dataset"
110
  # Create the Gradio interface
111
+ demo = gr.Interface(
112
  fn=ner_demo,
113
  inputs=gr.Textbox(lines=10, placeholder="Enter text here..."),
114
+ outputs="html",
115
+ #outputs=gr.JSON(),
116
+ title="Named Entity Recognition on PLOD-CW ",
117
+ description=f"{PROJECT_INTRO}\n\nEnter text to extract named entities using a NER model."
118
  )
119
 
120
+ # Function to run FastAPI
121
+ def run_fastapi():
122
+ uvicorn.run(app, host="0.0.0.0", port=8000)
123
+
124
+ # Function to run Gradio
125
+ def run_gradio():
126
+ demo.launch(server_name="0.0.0.0", server_port=7860)
127
+
128
+ # Run both servers in separate threads
129
+ threading.Thread(target=run_fastapi).start()
130
+ threading.Thread(target=run_gradio).start()
131
 
locust_stress.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from locust import HttpUser, task
2
+
3
+ class HelloWorldUser(HttpUser):
4
+ @task
5
+ def hello_world(self):
6
+ self.client.get("/hello")
7
+ self.client.get("/world")
requirements.txt CHANGED
@@ -4,4 +4,5 @@ fastapi
4
  gradio
5
  transformers
6
  pydantic
7
- uvicorn
 
 
4
  gradio
5
  transformers
6
  pydantic
7
+ uvicorn
8
+ urllib3~=2.0
stresstest_ner_service.ipynb ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 52,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from pydantic import BaseModel\n",
10
+ "from typing import List\n",
11
+ "import requests\n",
12
+ "\n",
13
+ "class NERRequest(BaseModel):\n",
14
+ " text: str\n",
15
+ "\n",
16
+ "class Entity(BaseModel):\n",
17
+ " entity: str\n",
18
+ " score: float\n",
19
+ " start: int\n",
20
+ " end: int\n",
21
+ " word: str\n",
22
+ "\n",
23
+ "class NERResponse(BaseModel):\n",
24
+ " entities: List[Entity]\n",
25
+ "\n"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "markdown",
30
+ "metadata": {},
31
+ "source": [
32
+ "## Single Request"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 53,
38
+ "metadata": {},
39
+ "outputs": [
40
+ {
41
+ "name": "stdout",
42
+ "output_type": "stream",
43
+ "text": [
44
+ "entities=[Entity(entity='B-O', score=0.7138232588768005, start=0, end=5, word='Hello'), Entity(entity='B-O', score=0.8602567911148071, start=5, end=6, word=','), Entity(entity='B-O', score=0.7045656442642212, start=7, end=12, word='world'), Entity(entity='B-O', score=0.9600160717964172, start=12, end=13, word='!')]\n"
45
+ ]
46
+ }
47
+ ],
48
+ "source": [
49
+ "NER_API_URL = \"http://127.0.0.1:8000\"\n",
50
+ "\n",
51
+ "# URL of the FastAPI server\n",
52
+ "url = f\"{NER_API_URL}/ner\"\n",
53
+ "\n",
54
+ "# Create an instance of NERRequest\n",
55
+ "request_data = NERRequest(text=\"Hello, world!\") # Pick from PLOD-CW\n",
56
+ "\n",
57
+ "# Convert the request data to a JSON string\n",
58
+ "request_json = request_data.json()\n",
59
+ "\n",
60
+ "# Make the POST request\n",
61
+ "response = requests.post(url, data=request_json, headers={\"Content-Type\": \"application/json\"})\n",
62
+ "\n",
63
+ "\n",
64
+ "# Check if the request was successful\n",
65
+ "if response.status_code == 200:\n",
66
+ " # Parse the response JSON to the NERResponse model\n",
67
+ " ner_response = NERResponse(**response.json())\n",
68
+ " print(ner_response)\n",
69
+ "else:\n",
70
+ " print(f\"Request failed with status code {response.status_code}\")"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "markdown",
75
+ "metadata": {},
76
+ "source": [
77
+ "## Single Request to /hello Endpoint"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": 54,
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "{'message': 'Hello, World!'}\n"
90
+ ]
91
+ }
92
+ ],
93
+ "source": [
94
+ "import requests\n",
95
+ "\n",
96
+ "# URL of the FastAPI server\n",
97
+ "url = f\"{NER_API_URL}/hello\"\n",
98
+ "\n",
99
+ "# Make the GET request\n",
100
+ "response = requests.get(url)\n",
101
+ "\n",
102
+ "# Check if the request was successful\n",
103
+ "if response.status_code == 200:\n",
104
+ " # Print the response JSON\n",
105
+ " print(response.json())\n",
106
+ "else:\n",
107
+ " print(f\"Request failed with status code {response.status_code}\")"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "markdown",
112
+ "metadata": {},
113
+ "source": [
114
+ "## Multiple request to /ner endpoint"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 55,
120
+ "metadata": {},
121
+ "outputs": [
122
+ {
123
+ "name": "stdout",
124
+ "output_type": "stream",
125
+ "text": [
126
+ " Time Taken\n",
127
+ "count 100.000000\n",
128
+ "mean 0.017934\n",
129
+ "std 0.008877\n",
130
+ "min 0.000000\n",
131
+ "25% 0.012228\n",
132
+ "50% 0.017015\n",
133
+ "75% 0.021086\n",
134
+ "max 0.047817\n"
135
+ ]
136
+ }
137
+ ],
138
+ "source": [
139
+ "import threading\n",
140
+ "import time\n",
141
+ "import pandas as pd\n",
142
+ "from gradio_client import Client\n",
143
+ "\n",
144
+ "# List to store the time taken for each request\n",
145
+ "times = []\n",
146
+ "\n",
147
+ "# Lock to ensure thread-safe operations on the times list\n",
148
+ "lock = threading.Lock()\n",
149
+ "\n",
150
+ "# Function to send a request to the API and measure time taken\n",
151
+ "def send_request():\n",
152
+ " start_time = time.time()\n",
153
+ "\n",
154
+ " # Create an instance of NERRequest\n",
155
+ " request_data = NERRequest(text=\"Hello, world!\")\n",
156
+ "\n",
157
+ " # Convert the request data to a JSON string\n",
158
+ " request_json = request_data.json()\n",
159
+ "\n",
160
+ " # Make the POST request\n",
161
+ " response = requests.post(url, data=request_json, headers={\"Content-Type\": \"application/json\"})\n",
162
+ "\n",
163
+ " end_time = time.time()\n",
164
+ " time_taken = end_time - start_time\n",
165
+ "\n",
166
+ " # Append the time taken to the list in a thread-safe manner\n",
167
+ " with lock:\n",
168
+ " times.append(time_taken)\n",
169
+ "\n",
170
+ "# Number of concurrent requests\n",
171
+ "num_requests = 100\n",
172
+ "\n",
173
+ "# Create threads\n",
174
+ "threads = []\n",
175
+ "for _ in range(num_requests):\n",
176
+ " thread = threading.Thread(target=send_request)\n",
177
+ " threads.append(thread)\n",
178
+ "\n",
179
+ "# Start threads\n",
180
+ "for thread in threads:\n",
181
+ " thread.start()\n",
182
+ "\n",
183
+ "# Wait for all threads to complete\n",
184
+ "for thread in threads:\n",
185
+ " thread.join()\n",
186
+ "\n",
187
+ "# Create a pandas DataFrame with the times\n",
188
+ "df = pd.DataFrame(times, columns=[\"Time Taken\"])\n",
189
+ "\n",
190
+ "# Print the describe of the time distribution\n",
191
+ "print(df.describe())\n"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "markdown",
196
+ "metadata": {},
197
+ "source": [
198
+ "## Plot the distribution of times"
199
+ ]
200
+ },
201
+ {
202
+ "cell_type": "code",
203
+ "execution_count": 56,
204
+ "metadata": {},
205
+ "outputs": [
206
+ {
207
+ "data": {
208
+ "text/plain": [
209
+ "0 0.020945\n",
210
+ "1 0.015174\n",
211
+ "2 0.008024\n",
212
+ "3 0.006214\n",
213
+ "4 0.012195\n",
214
+ " ... \n",
215
+ "95 0.011356\n",
216
+ "96 0.012191\n",
217
+ "97 0.013172\n",
218
+ "98 0.014342\n",
219
+ "99 0.016271\n",
220
+ "Name: Time Taken, Length: 100, dtype: float64"
221
+ ]
222
+ },
223
+ "execution_count": 56,
224
+ "metadata": {},
225
+ "output_type": "execute_result"
226
+ }
227
+ ],
228
+ "source": [
229
+ "df[\"Time Taken\"]"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "execution_count": 57,
235
+ "metadata": {},
236
+ "outputs": [
237
+ {
238
+ "data": {
239
+ "image/png": "",
240
+ "text/plain": [
241
+ "<Figure size 1000x600 with 1 Axes>"
242
+ ]
243
+ },
244
+ "metadata": {},
245
+ "output_type": "display_data"
246
+ },
247
+ {
248
+ "data": {
249
+ "image/png": "",
250
+ "text/plain": [
251
+ "<Figure size 1000x600 with 1 Axes>"
252
+ ]
253
+ },
254
+ "metadata": {},
255
+ "output_type": "display_data"
256
+ }
257
+ ],
258
+ "source": [
259
+ "import matplotlib.pyplot as plt\n",
260
+ "import seaborn as sns\n",
261
+ "# Plot the times taken\n",
262
+ "plt.figure(figsize=(10, 6))\n",
263
+ "plt.hist(times, bins=30, edgecolor='black')\n",
264
+ "plt.title(\"Distribution of Times Taken for NER API Requests on PLOD-CW Tuned BERT Model\")\n",
265
+ "plt.xlabel(\"Time Taken (seconds)\")\n",
266
+ "plt.ylabel(\"Frequency\")\n",
267
+ "plt.show()\n",
268
+ "\n",
269
+ "# Plot the KDE distribution and histogram of the times taken\n",
270
+ "# Plot the histogram and KDE distribution of the times taken\n",
271
+ "plt.figure(figsize=(10, 6))\n",
272
+ "sns.histplot(df[\"Time Taken\"], kde=True, bins=30, color='skyblue', stat='density', edgecolor='black')\n",
273
+ "plt.title(\"Distribution of Times Taken for API Requests on PLOD-CW Tuned BERT Model\")\n",
274
+ "plt.xlabel(\"Time Taken (seconds)\")\n",
275
+ "plt.ylabel(\"Density\")\n",
276
+ "plt.show()"
277
+ ]
278
+ }
279
+ ],
280
+ "metadata": {
281
+ "kernelspec": {
282
+ "display_name": "base",
283
+ "language": "python",
284
+ "name": "python3"
285
+ },
286
+ "language_info": {
287
+ "codemirror_mode": {
288
+ "name": "ipython",
289
+ "version": 3
290
+ },
291
+ "file_extension": ".py",
292
+ "mimetype": "text/x-python",
293
+ "name": "python",
294
+ "nbconvert_exporter": "python",
295
+ "pygments_lexer": "ipython3",
296
+ "version": "3.11.5"
297
+ }
298
+ },
299
+ "nbformat": 4,
300
+ "nbformat_minor": 2
301
+ }