textsearchgr / tickets1.py
chandrakalagowda's picture
Update tickets1.py
c072928
#!/usr/bin/env python
# coding: utf-8
# In[52]:
# !pip install -q pymilvus towhee gradio
# In[53]:
#!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O
# In[1]:
import pandas as pd
df = pd.read_csv('latest_ticket_data.csv')
df.head()
# In[2]:
df.shape
# In[3]:
df['length'] = df['description'].apply(
lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None
)
df['length'].max()
# In[4]:
df.description[14]
# In[5]:
df.shape
# In[6]:
id_category = df.set_index('id')['category'].to_dict()
# In[7]:
id_description = df.set_index('id')['description'].to_dict()
# In[8]:
id_description[12]
# In[9]:
id_category[10]
# In[11]:
from milvus import default_server
from pymilvus import connections, utility
default_server.start()
# In[12]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
# In[24]:
# # Milvus parameters
connections.connect(host='127.0.0.1', port='19530')
# In[25]:
default_server.listen_port
# In[17]:
def create_milvus_collection(collection_name, dim):
connections.connect(host='127.0.0.1', port='19530')
if utility.has_collection(collection_name):
utility.drop_collection(collection_name)
fields = [
FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False),
FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
]
schema = CollectionSchema(fields=fields, description='reverse text search')
collection = Collection(name=collection_name, schema=schema)
# create IVF_FLAT index for collection.
index_params = {
'metric_type':'L2',
'index_type':"IVF_FLAT",
'params':{"nlist":2048}
}
collection.create_index(field_name="embedding", index_params=index_params)
return collection
# In[18]:
collection = create_milvus_collection('latest_ticket_data', 768)
# In[19]:
collection.load()
# In[26]:
from towhee import pipe, ops
import numpy as np
from towhee.datacollection import DataCollection
insert_pipe = (
pipe.input('id', 'description', 'category')
.map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base'))
.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
.map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1',
port='19530',
collection_name='latest_ticket_data'))
.output()
)
# In[ ]:
# File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward
# embeddings += position_embeddings
# RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1
# In[27]:
import csv
with open('latest_ticket_data.csv', encoding='utf-8') as f:
reader = csv.reader(f)
next(reader)
for row in reader:
insert_pipe(*row)
# In[28]:
collection.load()
# In[29]:
print('Total number of inserted data is {}.'.format(collection.num_entities))
# In[30]:
ans_pipe = (
pipe.input('description')
.map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
.map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1',
port='19530',
collection_name='latest_ticket_data',
limit=1))
.map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
.output('description', 'category')
)
# In[31]:
ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer')
# In[32]:
ans = DataCollection(ans)
ans.show()
# In[33]:
import towhee
def chat(message, history):
history = history or []
ans_pipe = (
pipe.input('description')
.map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
.map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data', limit=1))
.map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
.output('description', 'category')
)
response = ans_pipe(message).get()[1][0]
history.append((message, response))
return history, history
# In[34]:
import gradio
collection.load()
chatbot = gradio.Chatbot(color_map=("green", "gray"))
interface = gradio.Interface(
chat,
["text", "state"],
[chatbot, "state"],
allow_screenshot=False,
allow_flagging="never",
)
interface.launch(inline=True, share=True)
# In[ ]:
# In[ ]:
# In[ ]:
# In[ ]: