Spaces:
Running
Running
from PyPDF2 import PdfReader,PdfWriter | |
import gradio as gr | |
from langchain.embeddings import CohereEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain import OpenAI | |
from langchain_cohere import ChatCohere | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import os | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
import spacy | |
spacy.cli.download("en_core_web_md") | |
nlp = spacy.load('en_core_web_md') | |
from dotenv import load_dotenv | |
load_dotenv() | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
COHERE_API_KEY = os.getenv('COHERE_API_KEY') | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0) | |
embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY) | |
def recieve_pdf(filename): | |
reader = PdfReader(filename) | |
writer = PdfWriter() | |
for page in reader.pages: | |
writer.add_page(page) | |
with open('processed_file.pdf','wb') as f: | |
writer.write(f) | |
read = PdfReader('processed_file.pdf') | |
extracted_file =[page.extract_text(0) for page in read.pages] | |
extracted_text = ''.join(extracted_file) | |
global file | |
file = extracted_text | |
# summary_prompt_formated = summary_prompt.format(document = extracted_text) | |
return 'Document succesfully uploaded' | |
def chatbot(query,history): | |
similarity_array =[] | |
embeded_query = embedding.embed_documents([query]) | |
doc = nlp(file) | |
sentences_1 = [str(sentence) for sentence in doc.sents] | |
embedded_text = embedding.embed_documents(sentences_1) | |
similarity_score = cosine_similarity(embeded_query,embedded_text) | |
similarity_array.append(similarity_score) | |
most_similar_index = np.argmax(similarity_array) | |
most_similar_documents = sentences_1[most_similar_index] | |
splitter_text = text_splitter.split_text(file) | |
recursive_embedded_text = embedding.embed_documents(splitter_text) | |
most_similar_embed = embedding.embed_documents([most_similar_documents]) | |
final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text) | |
final_similarity_index = np.argmax(final_similarity_score) | |
final_document = splitter_text[final_similarity_index] | |
prompt_formated = prompt.format(context = final_document, query = query) | |
response = llm.invoke(prompt_formated).content | |
history.append((query, response)) | |
return '', history | |
summary_template = """ You an article summarizer and have been provided with this file | |
{document} | |
provide a one line summary of the content of the provides file. | |
""" | |
summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template) | |
template = """ You are a knowledgeable chatbot that gently answers questions. | |
You know the following context information. | |
{context} | |
Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff. | |
Question: {query} | |
Answer:""" | |
prompt = PromptTemplate(input_variables= ['context', 'query'], template= template) | |
llm =ChatCohere(cohere_api_key=os.getenv('COHERE_API_KEY')) | |
with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo: | |
signal = gr.Markdown('''# Welcome to Chat with Docs | |
I am an AI that recieves a **PDF** and can answer questions on the content of the document.''') | |
inp = gr.File() | |
out = gr.Textbox(label= 'Summary') | |
inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True) | |
signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded') | |
chat = gr.Chatbot() | |
msg = gr.Textbox(info='input your chat') | |
with gr.Row(): | |
submit = gr.Button('Send') | |
clear = gr.ClearButton([msg,chat]) | |
msg.submit(chatbot, [msg, chat], [msg ,chat]) | |
submit.click(chatbot, [msg, chat], [msg ,chat]) | |
feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)') | |
demo.launch() | |