Spaces:

Hamza011
/

chat_with_docs

Sleeping

App Files Files Community

chat_with_docs / app.py

Hamza011

Update app.py

440ebd9 verified about 1 year ago

raw

history blame contribute delete

4.04 kB

	from PyPDF2 import PdfReader,PdfWriter
	import gradio as gr
	from langchain.embeddings import CohereEmbeddings
	from langchain.prompts import PromptTemplate
	from langchain import OpenAI
	from langchain_cohere import ChatCohere
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	import os
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity

	import spacy
	spacy.cli.download("en_core_web_md")
	nlp = spacy.load('en_core_web_md')


	from dotenv import load_dotenv

	load_dotenv()


	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	COHERE_API_KEY = os.getenv('COHERE_API_KEY')
	text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0)
	embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY)



	def recieve_pdf(filename):
	reader = PdfReader(filename)
	writer = PdfWriter()

	for page in reader.pages:
	writer.add_page(page)


	with open('processed_file.pdf','wb') as f:
	writer.write(f)

	read = PdfReader('processed_file.pdf')
	extracted_file =[page.extract_text(0) for page in read.pages]
	extracted_text = ''.join(extracted_file)

	global file
	file = extracted_text

	# summary_prompt_formated = summary_prompt.format(document = extracted_text)

	return 'Document succesfully uploaded'


	def chatbot(query,history):
	similarity_array =[]
	embeded_query = embedding.embed_documents([query])

	doc = nlp(file)
	sentences_1 = [str(sentence) for sentence in doc.sents]
	embedded_text = embedding.embed_documents(sentences_1)



	similarity_score = cosine_similarity(embeded_query,embedded_text)
	similarity_array.append(similarity_score)



	most_similar_index = np.argmax(similarity_array)
	most_similar_documents = sentences_1[most_similar_index]



	splitter_text = text_splitter.split_text(file)
	recursive_embedded_text = embedding.embed_documents(splitter_text)

	most_similar_embed = embedding.embed_documents([most_similar_documents])
	final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text)

	final_similarity_index = np.argmax(final_similarity_score)
	final_document = splitter_text[final_similarity_index]

	prompt_formated = prompt.format(context = final_document, query = query)
	response = llm.invoke(prompt_formated).content


	history.append((query, response))


	return '', history

	summary_template = """ You an article summarizer and have been provided with this file

	{document}

	provide a one line summary of the content of the provides file.

	"""

	summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template)
	template = """ You are a knowledgeable chatbot that gently answers questions.

	You know the following context information.

	{context}

	Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff.


	Question: {query}

	Answer:"""

	prompt = PromptTemplate(input_variables= ['context', 'query'], template= template)

	llm =ChatCohere(cohere_api_key=os.getenv('COHERE_API_KEY'))

	with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo:
	signal = gr.Markdown('''# Welcome to Chat with Docs
	I am an AI that recieves a PDF and can answer questions on the content of the document.''')
	inp = gr.File()
	out = gr.Textbox(label= 'Summary')
	inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True)
	signal_1 = gr.Markdown('Use the Textbox below to chat. Ask questions regarding the pdf you uploaded')
	chat = gr.Chatbot()
	msg = gr.Textbox(info='input your chat')

	with gr.Row():
	submit = gr.Button('Send')
	clear = gr.ClearButton([msg,chat])

	msg.submit(chatbot, [msg, chat], [msg ,chat])
	submit.click(chatbot, [msg, chat], [msg ,chat])

	feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)')



	demo.launch()