chat_with_docs / app.py
Hamza011's picture
Update app.py
440ebd9 verified
raw
history blame contribute delete
No virus
4.04 kB
from PyPDF2 import PdfReader,PdfWriter
import gradio as gr
from langchain.embeddings import CohereEmbeddings
from langchain.prompts import PromptTemplate
from langchain import OpenAI
from langchain_cohere import ChatCohere
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import spacy
spacy.cli.download("en_core_web_md")
nlp = spacy.load('en_core_web_md')
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0)
embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY)
def recieve_pdf(filename):
reader = PdfReader(filename)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
with open('processed_file.pdf','wb') as f:
writer.write(f)
read = PdfReader('processed_file.pdf')
extracted_file =[page.extract_text(0) for page in read.pages]
extracted_text = ''.join(extracted_file)
global file
file = extracted_text
# summary_prompt_formated = summary_prompt.format(document = extracted_text)
return 'Document succesfully uploaded'
def chatbot(query,history):
similarity_array =[]
embeded_query = embedding.embed_documents([query])
doc = nlp(file)
sentences_1 = [str(sentence) for sentence in doc.sents]
embedded_text = embedding.embed_documents(sentences_1)
similarity_score = cosine_similarity(embeded_query,embedded_text)
similarity_array.append(similarity_score)
most_similar_index = np.argmax(similarity_array)
most_similar_documents = sentences_1[most_similar_index]
splitter_text = text_splitter.split_text(file)
recursive_embedded_text = embedding.embed_documents(splitter_text)
most_similar_embed = embedding.embed_documents([most_similar_documents])
final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text)
final_similarity_index = np.argmax(final_similarity_score)
final_document = splitter_text[final_similarity_index]
prompt_formated = prompt.format(context = final_document, query = query)
response = llm.invoke(prompt_formated).content
history.append((query, response))
return '', history
summary_template = """ You an article summarizer and have been provided with this file
{document}
provide a one line summary of the content of the provides file.
"""
summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template)
template = """ You are a knowledgeable chatbot that gently answers questions.
You know the following context information.
{context}
Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff.
Question: {query}
Answer:"""
prompt = PromptTemplate(input_variables= ['context', 'query'], template= template)
llm =ChatCohere(cohere_api_key=os.getenv('COHERE_API_KEY'))
with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo:
signal = gr.Markdown('''# Welcome to Chat with Docs
I am an AI that recieves a **PDF** and can answer questions on the content of the document.''')
inp = gr.File()
out = gr.Textbox(label= 'Summary')
inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True)
signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded')
chat = gr.Chatbot()
msg = gr.Textbox(info='input your chat')
with gr.Row():
submit = gr.Button('Send')
clear = gr.ClearButton([msg,chat])
msg.submit(chatbot, [msg, chat], [msg ,chat])
submit.click(chatbot, [msg, chat], [msg ,chat])
feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)')
demo.launch()