chandrakalagowda commited on
Commit
ee5a6c6
1 Parent(s): 05c0c4d

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +3 -9
  2. latest_ticket_data.csv +0 -0
  3. requirements.txt +148 -0
  4. tickets1.py +276 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Textsearchgr
3
- emoji: 👀
4
- colorFrom: indigo
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.39.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: textsearchgr
3
+ app_file: tickets1.ipynb
 
 
4
  sdk: gradio
5
+ sdk_version: 3.34.0
 
 
6
  ---
 
 
latest_ticket_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==5.0.1
5
+ anyio==3.7.1
6
+ appnope==0.1.3
7
+ asttokens==2.2.1
8
+ async-timeout==4.0.2
9
+ attrs==23.1.0
10
+ av==10.0.0
11
+ backcall==0.2.0
12
+ beautifulsoup4==4.12.2
13
+ bleach==6.0.0
14
+ certifi==2023.5.7
15
+ charset-normalizer==3.2.0
16
+ click==8.1.6
17
+ comm==0.1.3
18
+ contourpy==1.1.0
19
+ cycler==0.11.0
20
+ debugpy==1.6.7
21
+ decorator==5.1.1
22
+ docutils==0.20.1
23
+ environs==9.5.0
24
+ executing==1.2.0
25
+ fastapi==0.100.0
26
+ ffmpy==0.3.1
27
+ filelock==3.12.2
28
+ fonttools==4.41.0
29
+ frozenlist==1.4.0
30
+ fsspec==2023.6.0
31
+ fvcore==0.1.5.post20221221
32
+ gitdb==4.0.10
33
+ GitPython==3.1.32
34
+ gradio==3.37.0
35
+ gradio_client==0.2.10
36
+ grpcio==1.53.0
37
+ h11==0.14.0
38
+ httpcore==0.17.3
39
+ httpx==0.24.1
40
+ huggingface-hub==0.16.4
41
+ idna==3.4
42
+ importlib-metadata==6.8.0
43
+ iopath==0.1.10
44
+ ipykernel==6.24.0
45
+ ipython==8.14.0
46
+ ipywidgets==8.0.7
47
+ jaraco.classes==3.3.0
48
+ jedi==0.18.2
49
+ Jinja2==3.1.2
50
+ jsonschema==4.18.4
51
+ jsonschema-specifications==2023.7.1
52
+ jupyter_client==8.3.0
53
+ jupyter_core==5.3.1
54
+ jupyterlab-widgets==3.0.8
55
+ keyring==24.2.0
56
+ kiwisolver==1.4.4
57
+ linkify-it-py==2.0.2
58
+ markdown-it-py==2.2.0
59
+ MarkupSafe==2.1.3
60
+ marshmallow==3.19.0
61
+ matplotlib==3.7.2
62
+ matplotlib-inline==0.1.6
63
+ mdit-py-plugins==0.3.3
64
+ mdurl==0.1.2
65
+ milvus==2.2.11
66
+ more-itertools==9.1.0
67
+ mpmath==1.3.0
68
+ multidict==6.0.4
69
+ nest-asyncio==1.5.6
70
+ networkx==3.1
71
+ numpy==1.25.1
72
+ opencv-python==4.8.0.74
73
+ orjson==3.9.2
74
+ packaging==23.1
75
+ pandas==2.0.3
76
+ parameterized==0.9.0
77
+ parso==0.8.3
78
+ pexpect==4.8.0
79
+ pickleshare==0.7.5
80
+ Pillow==10.0.0
81
+ pkginfo==1.9.6
82
+ platformdirs==3.9.1
83
+ portalocker==2.7.0
84
+ prompt-toolkit==3.0.39
85
+ protobuf==4.23.4
86
+ psutil==5.9.5
87
+ ptyprocess==0.7.0
88
+ pure-eval==0.2.2
89
+ pydantic==1.10.11
90
+ pydub==0.25.1
91
+ Pygments==2.15.1
92
+ pymilvus==2.2.11
93
+ pyparsing==3.0.9
94
+ python-dateutil==2.8.2
95
+ python-dotenv==1.0.0
96
+ python-multipart==0.0.6
97
+ pytorchvideo==0.1.3
98
+ pytz==2023.3
99
+ PyYAML==6.0.1
100
+ pyzmq==25.1.0
101
+ readme-renderer==40.0
102
+ referencing==0.30.0
103
+ regex==2023.6.3
104
+ requests==2.31.0
105
+ requests-toolbelt==1.0.0
106
+ rfc3986==2.0.0
107
+ rich==13.4.2
108
+ rpds-py==0.9.2
109
+ safetensors==0.3.1
110
+ scipy==1.11.1
111
+ seaborn==0.12.2
112
+ semantic-version==2.10.0
113
+ six==1.16.0
114
+ smmap==5.0.0
115
+ sniffio==1.3.0
116
+ soupsieve==2.4.1
117
+ stack-data==0.6.2
118
+ starlette==0.27.0
119
+ sympy==1.12
120
+ tabulate==0.9.0
121
+ tenacity==8.2.2
122
+ termcolor==2.3.0
123
+ timm==0.9.2
124
+ tokenizers==0.13.3
125
+ toolz==0.12.0
126
+ torch==2.0.1
127
+ torchvision==0.15.2
128
+ tornado==6.3.2
129
+ towhee==1.1.1
130
+ towhee.models==1.1.1
131
+ tqdm==4.65.0
132
+ traitlets==5.9.0
133
+ transformers==4.31.0
134
+ twine==4.0.2
135
+ typing_extensions==4.7.1
136
+ tzdata==2023.3
137
+ uc-micro-py==1.0.2
138
+ ujson==5.8.0
139
+ ultralytics==8.0.138
140
+ urllib3==2.0.3
141
+ uvicorn==0.23.1
142
+ wcwidth==0.2.6
143
+ webencodings==0.5.1
144
+ websockets==11.0.3
145
+ widgetsnbextension==4.0.8
146
+ yacs==0.1.8
147
+ yarl==1.9.2
148
+ zipp==3.16.2
tickets1.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[52]:
5
+
6
+
7
+ # !pip install -q pymilvus towhee gradio
8
+
9
+
10
+ # In[53]:
11
+
12
+
13
+ #!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O
14
+
15
+
16
+ # In[1]:
17
+
18
+
19
+ import pandas as pd
20
+ df = pd.read_csv('latest_ticket_data.csv')
21
+ df.head()
22
+
23
+
24
+ # In[2]:
25
+
26
+
27
+ df.shape
28
+
29
+
30
+ # In[3]:
31
+
32
+
33
+ df['length'] = df['description'].apply(
34
+ lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None
35
+ )
36
+ df['length'].max()
37
+
38
+
39
+ # In[4]:
40
+
41
+
42
+ df.description[14]
43
+
44
+
45
+ # In[5]:
46
+
47
+
48
+ df.shape
49
+
50
+
51
+ # In[6]:
52
+
53
+
54
+ id_category = df.set_index('id')['category'].to_dict()
55
+
56
+
57
+ # In[7]:
58
+
59
+
60
+ id_description = df.set_index('id')['description'].to_dict()
61
+
62
+
63
+ # In[8]:
64
+
65
+
66
+ id_description[12]
67
+
68
+
69
+ # In[9]:
70
+
71
+
72
+ id_category[10]
73
+
74
+
75
+ # In[11]:
76
+
77
+
78
+ from milvus import default_server
79
+ from pymilvus import connections, utility
80
+ default_server.start()
81
+
82
+
83
+ # In[12]:
84
+
85
+
86
+ from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
87
+
88
+
89
+ # In[24]:
90
+
91
+
92
+ # # Milvus parameters
93
+ connections.connect(host='127.0.0.1', port='19531')
94
+
95
+
96
+ # In[25]:
97
+
98
+
99
+ default_server.listen_port
100
+
101
+
102
+ # In[17]:
103
+
104
+
105
+ def create_milvus_collection(collection_name, dim):
106
+ connections.connect(host='127.0.0.1', port='19531')
107
+ if utility.has_collection(collection_name):
108
+ utility.drop_collection(collection_name)
109
+
110
+ fields = [
111
+ FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False),
112
+ FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
113
+ ]
114
+ schema = CollectionSchema(fields=fields, description='reverse text search')
115
+ collection = Collection(name=collection_name, schema=schema)
116
+
117
+ # create IVF_FLAT index for collection.
118
+ index_params = {
119
+ 'metric_type':'L2',
120
+ 'index_type':"IVF_FLAT",
121
+ 'params':{"nlist":2048}
122
+ }
123
+ collection.create_index(field_name="embedding", index_params=index_params)
124
+ return collection
125
+
126
+
127
+ # In[18]:
128
+
129
+
130
+ collection = create_milvus_collection('latest_ticket_data', 768)
131
+
132
+
133
+ # In[19]:
134
+
135
+
136
+ collection.load()
137
+
138
+
139
+ # In[26]:
140
+
141
+
142
+ from towhee import pipe, ops
143
+ import numpy as np
144
+ from towhee.datacollection import DataCollection
145
+
146
+ insert_pipe = (
147
+ pipe.input('id', 'description', 'category')
148
+ .map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base'))
149
+ .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
150
+ .map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1',
151
+ port='19531',
152
+ collection_name='latest_ticket_data'))
153
+ .output()
154
+ )
155
+
156
+
157
+ # In[ ]:
158
+
159
+
160
+ # File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward
161
+ # embeddings += position_embeddings
162
+ # RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1
163
+
164
+
165
+ # In[27]:
166
+
167
+
168
+ import csv
169
+ with open('latest_ticket_data.csv', encoding='utf-8') as f:
170
+ reader = csv.reader(f)
171
+ next(reader)
172
+ for row in reader:
173
+ insert_pipe(*row)
174
+
175
+
176
+ # In[28]:
177
+
178
+
179
+ collection.load()
180
+
181
+
182
+ # In[29]:
183
+
184
+
185
+ print('Total number of inserted data is {}.'.format(collection.num_entities))
186
+
187
+
188
+ # In[30]:
189
+
190
+
191
+ ans_pipe = (
192
+ pipe.input('description')
193
+ .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
194
+ .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
195
+ .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1',
196
+ port='19531',
197
+ collection_name='latest_ticket_data',
198
+ limit=1))
199
+ .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
200
+ .output('description', 'category')
201
+ )
202
+
203
+
204
+ # In[31]:
205
+
206
+
207
+ ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer')
208
+
209
+
210
+ # In[32]:
211
+
212
+
213
+ ans = DataCollection(ans)
214
+ ans.show()
215
+
216
+
217
+ # In[33]:
218
+
219
+
220
+ import towhee
221
+ def chat(message, history):
222
+ history = history or []
223
+ ans_pipe = (
224
+ pipe.input('description')
225
+ .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
226
+ .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
227
+ .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19531', collection_name='latest_ticket_data', limit=1))
228
+ .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
229
+ .output('description', 'category')
230
+ )
231
+
232
+ response = ans_pipe(message).get()[1][0]
233
+ history.append((message, response))
234
+ return history, history
235
+
236
+
237
+ # In[34]:
238
+
239
+
240
+ import gradio
241
+
242
+ collection.load()
243
+ chatbot = gradio.Chatbot(color_map=("green", "gray"))
244
+ interface = gradio.Interface(
245
+ chat,
246
+ ["text", "state"],
247
+ [chatbot, "state"],
248
+ allow_screenshot=False,
249
+ allow_flagging="never",
250
+ )
251
+ interface.launch(inline=True, share=True)
252
+
253
+
254
+ # In[ ]:
255
+
256
+
257
+
258
+
259
+
260
+ # In[ ]:
261
+
262
+
263
+
264
+
265
+
266
+ # In[ ]:
267
+
268
+
269
+
270
+
271
+
272
+ # In[ ]:
273
+
274
+
275
+
276
+