Spaces:

chandrakalagowda
/

textsearchgr

Runtime error

App Files Files Community

chandrakalagowda commited on Aug 7, 2023

Commit

ee5a6c6

1 Parent(s): 05c0c4d

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +3 -9
latest_ticket_data.csv +0 -0
requirements.txt +148 -0
tickets1.py +276 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Textsearchgr
-emoji: 👀
-colorFrom: indigo
-colorTo: gray
 sdk: gradio
-sdk_version: 3.39.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: textsearchgr
+app_file: tickets1.ipynb
 sdk: gradio
+sdk_version: 3.34.0
 ---

latest_ticket_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,148 @@

+aiofiles==23.1.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==5.0.1
+anyio==3.7.1
+appnope==0.1.3
+asttokens==2.2.1
+async-timeout==4.0.2
+attrs==23.1.0
+av==10.0.0
+backcall==0.2.0
+beautifulsoup4==4.12.2
+bleach==6.0.0
+certifi==2023.5.7
+charset-normalizer==3.2.0
+click==8.1.6
+comm==0.1.3
+contourpy==1.1.0
+cycler==0.11.0
+debugpy==1.6.7
+decorator==5.1.1
+docutils==0.20.1
+environs==9.5.0
+executing==1.2.0
+fastapi==0.100.0
+ffmpy==0.3.1
+filelock==3.12.2
+fonttools==4.41.0
+frozenlist==1.4.0
+fsspec==2023.6.0
+fvcore==0.1.5.post20221221
+gitdb==4.0.10
+GitPython==3.1.32
+gradio==3.37.0
+gradio_client==0.2.10
+grpcio==1.53.0
+h11==0.14.0
+httpcore==0.17.3
+httpx==0.24.1
+huggingface-hub==0.16.4
+idna==3.4
+importlib-metadata==6.8.0
+iopath==0.1.10
+ipykernel==6.24.0
+ipython==8.14.0
+ipywidgets==8.0.7
+jaraco.classes==3.3.0
+jedi==0.18.2
+Jinja2==3.1.2
+jsonschema==4.18.4
+jsonschema-specifications==2023.7.1
+jupyter_client==8.3.0
+jupyter_core==5.3.1
+jupyterlab-widgets==3.0.8
+keyring==24.2.0
+kiwisolver==1.4.4
+linkify-it-py==2.0.2
+markdown-it-py==2.2.0
+MarkupSafe==2.1.3
+marshmallow==3.19.0
+matplotlib==3.7.2
+matplotlib-inline==0.1.6
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+milvus==2.2.11
+more-itertools==9.1.0
+mpmath==1.3.0
+multidict==6.0.4
+nest-asyncio==1.5.6
+networkx==3.1
+numpy==1.25.1
+opencv-python==4.8.0.74
+orjson==3.9.2
+packaging==23.1
+pandas==2.0.3
+parameterized==0.9.0
+parso==0.8.3
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==10.0.0
+pkginfo==1.9.6
+platformdirs==3.9.1
+portalocker==2.7.0
+prompt-toolkit==3.0.39
+protobuf==4.23.4
+psutil==5.9.5
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pydantic==1.10.11
+pydub==0.25.1
+Pygments==2.15.1
+pymilvus==2.2.11
+pyparsing==3.0.9
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-multipart==0.0.6
+pytorchvideo==0.1.3
+pytz==2023.3
+PyYAML==6.0.1
+pyzmq==25.1.0
+readme-renderer==40.0
+referencing==0.30.0
+regex==2023.6.3
+requests==2.31.0
+requests-toolbelt==1.0.0
+rfc3986==2.0.0
+rich==13.4.2
+rpds-py==0.9.2
+safetensors==0.3.1
+scipy==1.11.1
+seaborn==0.12.2
+semantic-version==2.10.0
+six==1.16.0
+smmap==5.0.0
+sniffio==1.3.0
+soupsieve==2.4.1
+stack-data==0.6.2
+starlette==0.27.0
+sympy==1.12
+tabulate==0.9.0
+tenacity==8.2.2
+termcolor==2.3.0
+timm==0.9.2
+tokenizers==0.13.3
+toolz==0.12.0
+torch==2.0.1
+torchvision==0.15.2
+tornado==6.3.2
+towhee==1.1.1
+towhee.models==1.1.1
+tqdm==4.65.0
+traitlets==5.9.0
+transformers==4.31.0
+twine==4.0.2
+typing_extensions==4.7.1
+tzdata==2023.3
+uc-micro-py==1.0.2
+ujson==5.8.0
+ultralytics==8.0.138
+urllib3==2.0.3
+uvicorn==0.23.1
+wcwidth==0.2.6
+webencodings==0.5.1
+websockets==11.0.3
+widgetsnbextension==4.0.8
+yacs==0.1.8
+yarl==1.9.2
+zipp==3.16.2

tickets1.py ADDED Viewed

	@@ -0,0 +1,276 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[52]:
+# !pip install -q pymilvus towhee gradio
+# In[53]:
+#!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O
+# In[1]:
+import pandas as pd
+df = pd.read_csv('latest_ticket_data.csv')
+df.head()
+# In[2]:
+df.shape
+# In[3]:
+df['length'] = df['description'].apply(
+    lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None
+)
+df['length'].max()
+# In[4]:
+df.description[14]
+# In[5]:
+df.shape
+# In[6]:
+id_category = df.set_index('id')['category'].to_dict()
+# In[7]:
+id_description = df.set_index('id')['description'].to_dict()
+# In[8]:
+id_description[12]
+# In[9]:
+id_category[10]
+# In[11]:
+from milvus import default_server
+from pymilvus import connections, utility
+default_server.start()
+# In[12]:
+from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
+# In[24]:
+# # Milvus parameters
+connections.connect(host='127.0.0.1', port='19531')
+# In[25]:
+default_server.listen_port
+# In[17]:
+def create_milvus_collection(collection_name, dim):
+    connections.connect(host='127.0.0.1', port='19531')
+    if utility.has_collection(collection_name):
+        utility.drop_collection(collection_name)
+    fields = [
+    FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False),
+    FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
+    ]
+    schema = CollectionSchema(fields=fields, description='reverse text search')
+    collection = Collection(name=collection_name, schema=schema)
+    # create IVF_FLAT index for collection.
+    index_params = {
+        'metric_type':'L2',
+        'index_type':"IVF_FLAT",
+        'params':{"nlist":2048}
+    }
+    collection.create_index(field_name="embedding", index_params=index_params)
+    return collection
+# In[18]:
+collection = create_milvus_collection('latest_ticket_data', 768)
+# In[19]:
+collection.load()
+# In[26]:
+from towhee import pipe, ops
+import numpy as np
+from towhee.datacollection import DataCollection
+insert_pipe = (
+    pipe.input('id', 'description', 'category')
+        .map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base'))
+        .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
+        .map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1',
+                                                                          port='19531',
+                                                                          collection_name='latest_ticket_data'))
+        .output()
+)
+# In[ ]:
+#  File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward
+#     embeddings += position_embeddings
+# RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1
+# In[27]:
+import csv
+with open('latest_ticket_data.csv', encoding='utf-8') as f:
+    reader = csv.reader(f)
+    next(reader)
+    for row in reader:
+        insert_pipe(*row)
+# In[28]:
+collection.load()
+# In[29]:
+print('Total number of inserted data is {}.'.format(collection.num_entities))
+# In[30]:
+ans_pipe = (
+    pipe.input('description')
+        .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
+        .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
+        .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1',
+                                                        port='19531',
+                                                        collection_name='latest_ticket_data',
+                                                        limit=1))
+        .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
+        .output('description', 'category')
+)
+# In[31]:
+ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer')
+# In[32]:
+ans = DataCollection(ans)
+ans.show()
+# In[33]:
+import towhee
+def chat(message, history):
+    history = history or []
+    ans_pipe = (
+        pipe.input('description')
+            .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
+            .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
+            .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19531', collection_name='latest_ticket_data', limit=1))
+            .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
+            .output('description', 'category')
+    )
+    response = ans_pipe(message).get()[1][0]
+    history.append((message, response))
+    return history, history
+# In[34]:
+import gradio
+collection.load()
+chatbot = gradio.Chatbot(color_map=("green", "gray"))
+interface = gradio.Interface(
+    chat,
+    ["text", "state"],
+    [chatbot, "state"],
+    allow_screenshot=False,
+    allow_flagging="never",
+)
+interface.launch(inline=True, share=True)
+# In[ ]:
+# In[ ]:
+# In[ ]:
+# In[ ]: