#!/usr/bin/env python
# coding: utf-8

# In[52]:


# !pip install -q pymilvus towhee gradio


# In[53]:


#!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O


# In[1]:


import pandas as pd
df = pd.read_csv('latest_ticket_data.csv')
df.head()


# In[2]:


df.shape


# In[3]:


df['length'] = df['description'].apply(
    lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None
)
df['length'].max()


# In[4]:


df.description[14]


# In[5]:


df.shape


# In[6]:


id_category = df.set_index('id')['category'].to_dict()


# In[7]:


id_description = df.set_index('id')['description'].to_dict()


# In[8]:


id_description[12]


# In[9]:


id_category[10]


# In[11]:


from milvus import default_server
from pymilvus import connections, utility
default_server.start()


# In[12]:


from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility


# In[24]:


# # Milvus parameters
connections.connect(host='127.0.0.1', port='19530')


# In[25]:


default_server.listen_port


# In[17]:


def create_milvus_collection(collection_name, dim):
    connections.connect(host='127.0.0.1', port='19530')
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
    FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False),
    FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse text search')
    collection = Collection(name=collection_name, schema=schema)

    # create IVF_FLAT index for collection.
    index_params = {
        'metric_type':'L2',
        'index_type':"IVF_FLAT",
        'params':{"nlist":2048}
    }
    collection.create_index(field_name="embedding", index_params=index_params)
    return collection


# In[18]:


collection = create_milvus_collection('latest_ticket_data', 768)


# In[19]:


collection.load()


# In[26]:


from towhee import pipe, ops
import numpy as np
from towhee.datacollection import DataCollection

insert_pipe = (
    pipe.input('id', 'description', 'category')
        .map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base'))
        .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
        .map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1', 
                                                                          port='19530',
                                                                          collection_name='latest_ticket_data'))
        .output()
)


# In[ ]:


#  File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward
#     embeddings += position_embeddings
# RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1


# In[27]:


import csv
with open('latest_ticket_data.csv', encoding='utf-8') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        insert_pipe(*row)


# In[28]:


collection.load()


# In[29]:


print('Total number of inserted data is {}.'.format(collection.num_entities))


# In[30]:


ans_pipe = (
    pipe.input('description')
        .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
        .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
        .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', 
                                                        port='19530',
                                                        collection_name='latest_ticket_data',
                                                        limit=1))
        .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
        .output('description', 'category')
)


# In[31]:


ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer')


# In[32]:


ans = DataCollection(ans)
ans.show()


# In[33]:


import towhee
def chat(message, history):
    history = history or []
    ans_pipe = (
        pipe.input('description')
            .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base"))
            .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))
            .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data', limit=1))
            .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x])
            .output('description', 'category')
    )

    response = ans_pipe(message).get()[1][0]
    history.append((message, response))
    return history, history


# In[34]:


import gradio

collection.load()
chatbot = gradio.Chatbot(color_map=("green", "gray"))
interface = gradio.Interface(
    chat,
    ["text", "state"],
    [chatbot, "state"],
    allow_screenshot=False,
    allow_flagging="never",
)
interface.launch(inline=True, share=True)


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]: