#!/usr/bin/env python # coding: utf-8 # In[52]: # !pip install -q pymilvus towhee gradio # In[53]: #!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O # In[1]: import pandas as pd df = pd.read_csv('latest_ticket_data.csv') df.head() # In[2]: df.shape # In[3]: df['length'] = df['description'].apply( lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None ) df['length'].max() # In[4]: df.description[14] # In[5]: df.shape # In[6]: id_category = df.set_index('id')['category'].to_dict() # In[7]: id_description = df.set_index('id')['description'].to_dict() # In[8]: id_description[12] # In[9]: id_category[10] # In[11]: from milvus import default_server from pymilvus import connections, utility default_server.start() # In[12]: from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility # In[24]: # # Milvus parameters connections.connect(host='127.0.0.1', port='19530') # In[25]: default_server.listen_port # In[17]: def create_milvus_collection(collection_name, dim): connections.connect(host='127.0.0.1', port='19530') if utility.has_collection(collection_name): utility.drop_collection(collection_name) fields = [ FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False), FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim) ] schema = CollectionSchema(fields=fields, description='reverse text search') collection = Collection(name=collection_name, schema=schema) # create IVF_FLAT index for collection. index_params = { 'metric_type':'L2', 'index_type':"IVF_FLAT", 'params':{"nlist":2048} } collection.create_index(field_name="embedding", index_params=index_params) return collection # In[18]: collection = create_milvus_collection('latest_ticket_data', 768) # In[19]: collection.load() # In[26]: from towhee import pipe, ops import numpy as np from towhee.datacollection import DataCollection insert_pipe = ( pipe.input('id', 'description', 'category') .map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base')) .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) .map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data')) .output() ) # In[ ]: # File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward # embeddings += position_embeddings # RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1 # In[27]: import csv with open('latest_ticket_data.csv', encoding='utf-8') as f: reader = csv.reader(f) next(reader) for row in reader: insert_pipe(*row) # In[28]: collection.load() # In[29]: print('Total number of inserted data is {}.'.format(collection.num_entities)) # In[30]: ans_pipe = ( pipe.input('description') .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base")) .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data', limit=1)) .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x]) .output('description', 'category') ) # In[31]: ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer') # In[32]: ans = DataCollection(ans) ans.show() # In[33]: import towhee def chat(message, history): history = history or [] ans_pipe = ( pipe.input('description') .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base")) .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data', limit=1)) .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x]) .output('description', 'category') ) response = ans_pipe(message).get()[1][0] history.append((message, response)) return history, history # In[34]: import gradio collection.load() chatbot = gradio.Chatbot(color_map=("green", "gray")) interface = gradio.Interface( chat, ["text", "state"], [chatbot, "state"], allow_screenshot=False, allow_flagging="never", ) interface.launch(inline=True, share=True) # In[ ]: # In[ ]: # In[ ]: # In[ ]: