|
from llama_index import SimpleDirectoryReader, LLMPredictor, PromptHelper, StorageContext, ServiceContext, GPTVectorStoreIndex, load_index_from_storage |
|
from langchain.chat_models import ChatOpenAI |
|
import gradio as gr |
|
import sys |
|
import os |
|
import pinecone |
|
from langchain.vectorstores import Pinecone |
|
from langchain.chains import RetrievalQA |
|
from langchain.llms import OpenAI |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
import time |
|
import numpy as np |
|
from io import StringIO |
|
import pandas as pd |
|
|
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
import re |
|
import asyncio |
|
import datetime |
|
|
|
|
|
os.environ["OPENAI_API_KEY"] = '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' |
|
|
|
|
|
|
|
docsearch='' |
|
llm='' |
|
qa_with_sources='' |
|
email='' |
|
|
|
pinecone_indexes = { |
|
'ouroffice': {'api_key': '404d3a61-d813-494c-99c2-a426f91c1523','environment': "asia-southeast1-gcp-free"}, |
|
'questions':{'api_key': '3941be85-a33a-421c-b84b-e05311f5f250','environment': "asia-southeast1-gcp-free" }, |
|
} |
|
os.environ["OPENAI_API_KEY"] = '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' |
|
|
|
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") |
|
|
|
|
|
|
|
|
|
def extract_email(string): |
|
pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b' |
|
match = re.search(pattern, string) |
|
if match: |
|
email = match.group() |
|
return email |
|
else: |
|
return False |
|
|
|
def create_row_from_vector(vectors): |
|
row_data=[] |
|
for el in vectors: |
|
print('vector--- ', el) |
|
if el[1] > 0.72: |
|
row_data.append([ el[0].page_content, el[1], el[0].metadata['userId'], el[0].metadata['date'] ]) |
|
return row_data |
|
|
|
|
|
def retrieve_only_docs(index, embeddings, query): |
|
|
|
pinecone.init(api_key=pinecone_indexes[index]['api_key'], |
|
environment=pinecone_indexes[index]['environment'] |
|
) |
|
docsearch = Pinecone.from_existing_index(index, embeddings) |
|
docs = docsearch.similarity_search_with_score(query) |
|
|
|
|
|
return docs |
|
|
|
|
|
def data_querying(input_text, index): |
|
index_name = index |
|
pinecone.init(api_key=pinecone_indexes[index]['api_key'], |
|
environment=pinecone_indexes[index]['environment'] |
|
) |
|
url_array=[] |
|
resp='' |
|
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") |
|
docsearch = Pinecone.from_existing_index(index_name, embeddings) |
|
llm=OpenAI() |
|
if index=='questions': |
|
resp=retrieve_only_docs(index,embeddings, input_text) |
|
return resp |
|
else: |
|
qa_sources = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True) |
|
result = qa_sources({"query": input_text}) |
|
url_array=[] |
|
source_doc = result["source_documents"] |
|
if(source_doc and len(source_doc)>0): |
|
for element in source_doc: |
|
|
|
if(index=='questions'): |
|
|
|
url_el = f"{element.metadata['userId']} asked this question on {element.metadata['date']}" |
|
|
|
url_array.append(url_el) |
|
else: |
|
url_el = element.metadata['source'] |
|
if url_el not in url_array: |
|
url_md="["+url_el+"]("+url_el+")" |
|
url_array.append(url_el) |
|
url_str = ",\n".join(url_array) |
|
resp = result["result"] + '\n\nSee sources: \n' + url_str |
|
else: |
|
resp = result |
|
|
|
return resp |
|
|
|
|
|
def vectorize_question(query, index, email): |
|
embeddings = OpenAIEmbeddings() |
|
pinecone.init( |
|
api_key=pinecone_indexes[index]['api_key'], |
|
environment=pinecone_indexes[index]['environment'] |
|
) |
|
index = pinecone.Index(index) |
|
vectorstore = Pinecone(index, embeddings.embed_query, 'text') |
|
vectorstore.add_texts( |
|
texts=[query], |
|
metadatas=[{'date':datetime.datetime.now(), |
|
'userId':email, |
|
|
|
}] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def upload_file(files): |
|
file_paths = [file.name for file in files] |
|
return file_paths |
|
|
|
def process_csv_text(file): |
|
df = pd.read_csv(file.name, delimiter=',') |
|
df.fillna(0, inplace=True) |
|
|
|
return df |
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# Scoop - let's learn about OurOffice together |
|
## together we ask to finish the task! |
|
#### Two heads are better than one |
|
""") |
|
|
|
txt_vis=False |
|
|
|
with gr.Tab("Chatbot"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
chatbot = gr.Chatbot(value=[[ None, "Hi there! Please provide your email to get started :)!"]]) |
|
msg = gr.Textbox() |
|
with gr.Column(): |
|
similar_tbl=gr.Dataframe( |
|
headers=["question", 'Relevancy Score',"Email", "Date"], |
|
|
|
|
|
|
|
col_count=(4, "fixed"), |
|
interactive=False |
|
) |
|
clear = gr.ClearButton([msg, chatbot]) |
|
|
|
def respond(message, chat_history): |
|
global txt_vis |
|
global email |
|
if txt_vis==False: |
|
email = extract_email(message) |
|
if email == False: |
|
bot_message='Please provide your email.' |
|
chat_history.append((message, bot_message)) |
|
return '',chat_history, None |
|
else: |
|
bot_message= f"Hi {email}! Please ask any questions you have about OurOffice.io that could be found on its website." |
|
chat_history.append((message, bot_message)) |
|
txt_vis=True |
|
return '',chat_history, None |
|
else: |
|
bot_message = data_querying(message, 'ouroffice') |
|
similar_questions= data_querying(message, 'questions') |
|
print(f"similar questions--- {similar_questions}") |
|
tbl_data = create_row_from_vector(similar_questions) |
|
print(f"tbl data--- {tbl_data}") |
|
chat_history.append((message, bot_message)) |
|
question_saved = vectorize_question(message, 'questions',email) |
|
|
|
|
|
|
|
return "", chat_history, tbl_data |
|
|
|
msg.submit(respond, [msg, chatbot], [msg, chatbot, similar_tbl]) |
|
|
|
with gr.Tab("CSV Analysis"): |
|
file_output = gr.File() |
|
upload_button = gr.UploadButton("Click to Upload a File", file_types=[".csv"], file_count="multiple") |
|
upload_button.upload(upload_file, upload_button, file_output) |
|
result = gr.Dataframe() |
|
|
|
|
|
file_output.change(fn=process_csv_text,inputs=file_output, outputs=result) |
|
|
|
|
|
demo.launch(debug=True) |