Spaces:

chandrakalagowda
/

textsearchgr

Runtime error

App Files Files Community

textsearchgr / tickets1.py

chandrakalagowda

Update tickets1.py

c072928 12 months ago

raw

history blame contribute delete

5.25 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[52]:


	# !pip install -q pymilvus towhee gradio


	# In[53]:


	#!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O


	# In[1]:


	import pandas as pd
	df = pd.read_csv('latest_ticket_data.csv')
	df.head()


	# In[2]:


	df.shape


	# In[3]:


	df['length'] = df['description'].apply(
	lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None
	)
	df['length'].max()


	# In[4]:


	df.description[14]


	# In[5]:


	df.shape


	# In[6]:


	id_category = df.set_index('id')['category'].to_dict()


	# In[7]:


	id_description = df.set_index('id')['description'].to_dict()


	# In[8]:


	id_description[12]


	# In[9]:


	id_category[10]


	# In[11]:


	from milvus import default_server
	from pymilvus import connections, utility
	default_server.start()


	# In[12]:


	from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility


	# In[24]:


	# # Milvus parameters
	connections.connect(host='127.0.0.1', port='19530')


	# In[25]:


	default_server.listen_port


	# In[17]:


	def create_milvus_collection(collection_name, dim):
	connections.connect(host='127.0.0.1', port='19530')
	if utility.has_collection(collection_name):
	utility.drop_collection(collection_name)

	fields = [
	FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False),
	FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
	]
	schema = CollectionSchema(fields=fields, description='reverse text search')
	collection = Collection(name=collection_name, schema=schema)

	# create IVF_FLAT index for collection.
	index_params = {
	'metric_type':'L2',
	'index_type':"IVF_FLAT",
	'params':{"nlist":2048}
	}
	collection.create_index(field_name="embedding", index_params=index_params)
	return collection


	# In[18]:


	collection = create_milvus_collection('latest_ticket_data', 768)


	# In[19]:


	collection.load()


	# In[26]:


	from towhee import pipe, ops
	import numpy as np
	from towhee.datacollection import DataCollection

	insert_pipe = (
	pipe.input('id', 'description', 'category')
	.map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base'))
	.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
	.map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1',
	port='19530',
	collection_name='latest_ticket_data'))
	.output()
	)


	# In[ ]:


	# File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward
	# embeddings += position_embeddings
	# RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1


	# In[27]:


	import csv
	with open('latest_ticket_data.csv', encoding='utf-8') as f:
	reader = csv.reader(f)
	next(reader)
	for row in reader:
	insert_pipe(*row)


	# In[28]:


	collection.load()


	# In[29]:


	print('Total number of inserted data is {}.'.format(collection.num_entities))


	# In[30]:


	ans_pipe = (
	pipe.input('description')
	.map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
	.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
	.map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1',
	port='19530',
	collection_name='latest_ticket_data',
	limit=1))
	.map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
	.output('description', 'category')
	)


	# In[31]:


	ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer')


	# In[32]:


	ans = DataCollection(ans)
	ans.show()


	# In[33]:


	import towhee
	def chat(message, history):
	history = history or []
	ans_pipe = (
	pipe.input('description')
	.map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
	.map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
	.map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data', limit=1))
	.map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
	.output('description', 'category')
	)

	response = ans_pipe(message).get()[1][0]
	history.append((message, response))
	return history, history


	# In[34]:


	import gradio

	collection.load()
	chatbot = gradio.Chatbot(color_map=("green", "gray"))
	interface = gradio.Interface(
	chat,
	["text", "state"],
	[chatbot, "state"],
	allow_screenshot=False,
	allow_flagging="never",
	)
	interface.launch(inline=True, share=True)


	# In[ ]:





	# In[ ]:





	# In[ ]:





	# In[ ]: