chat_with_docs / app.py
Hamza011's picture
Update app.py
440ebd9 verified
from PyPDF2 import PdfReader,PdfWriter
import gradio as gr
from langchain.embeddings import CohereEmbeddings
from langchain.prompts import PromptTemplate
from langchain import OpenAI
from langchain_cohere import ChatCohere
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import spacy
spacy.cli.download("en_core_web_md")
nlp = spacy.load('en_core_web_md')
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0)
embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY)
def recieve_pdf(filename):
reader = PdfReader(filename)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
with open('processed_file.pdf','wb') as f:
writer.write(f)
read = PdfReader('processed_file.pdf')
extracted_file =[page.extract_text(0) for page in read.pages]
extracted_text = ''.join(extracted_file)
global file
file = extracted_text
# summary_prompt_formated = summary_prompt.format(document = extracted_text)
return 'Document succesfully uploaded'
def chatbot(query,history):
similarity_array =[]
embeded_query = embedding.embed_documents([query])
doc = nlp(file)
sentences_1 = [str(sentence) for sentence in doc.sents]
embedded_text = embedding.embed_documents(sentences_1)
similarity_score = cosine_similarity(embeded_query,embedded_text)
similarity_array.append(similarity_score)
most_similar_index = np.argmax(similarity_array)
most_similar_documents = sentences_1[most_similar_index]
splitter_text = text_splitter.split_text(file)
recursive_embedded_text = embedding.embed_documents(splitter_text)
most_similar_embed = embedding.embed_documents([most_similar_documents])
final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text)
final_similarity_index = np.argmax(final_similarity_score)
final_document = splitter_text[final_similarity_index]
prompt_formated = prompt.format(context = final_document, query = query)
response = llm.invoke(prompt_formated).content
history.append((query, response))
return '', history
summary_template = """ You an article summarizer and have been provided with this file
{document}
provide a one line summary of the content of the provides file.
"""
summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template)
template = """ You are a knowledgeable chatbot that gently answers questions.
You know the following context information.
{context}
Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff.
Question: {query}
Answer:"""
prompt = PromptTemplate(input_variables= ['context', 'query'], template= template)
llm =ChatCohere(cohere_api_key=os.getenv('COHERE_API_KEY'))
with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo:
signal = gr.Markdown('''# Welcome to Chat with Docs
I am an AI that recieves a **PDF** and can answer questions on the content of the document.''')
inp = gr.File()
out = gr.Textbox(label= 'Summary')
inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True)
signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded')
chat = gr.Chatbot()
msg = gr.Textbox(info='input your chat')
with gr.Row():
submit = gr.Button('Send')
clear = gr.ClearButton([msg,chat])
msg.submit(chatbot, [msg, chat], [msg ,chat])
submit.click(chatbot, [msg, chat], [msg ,chat])
feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)')
demo.launch()