Spaces:
Sleeping
Sleeping
File size: 4,040 Bytes
196c8fb 46f2531 196c8fb b330d7a 196c8fb f588f27 196c8fb b330d7a 196c8fb aeb7c17 196c8fb cd8a160 196c8fb fd3203e 196c8fb 440ebd9 196c8fb 94cf8b4 196c8fb bf94a11 196c8fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from PyPDF2 import PdfReader,PdfWriter
import gradio as gr
from langchain.embeddings import CohereEmbeddings
from langchain.prompts import PromptTemplate
from langchain import OpenAI
from langchain_cohere import ChatCohere
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import spacy
spacy.cli.download("en_core_web_md")
nlp = spacy.load('en_core_web_md')
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
COHERE_API_KEY = os.getenv('COHERE_API_KEY')
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 0)
embedding = CohereEmbeddings(model='embed-multilingual-v3.0',cohere_api_key=COHERE_API_KEY)
def recieve_pdf(filename):
reader = PdfReader(filename)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
with open('processed_file.pdf','wb') as f:
writer.write(f)
read = PdfReader('processed_file.pdf')
extracted_file =[page.extract_text(0) for page in read.pages]
extracted_text = ''.join(extracted_file)
global file
file = extracted_text
# summary_prompt_formated = summary_prompt.format(document = extracted_text)
return 'Document succesfully uploaded'
def chatbot(query,history):
similarity_array =[]
embeded_query = embedding.embed_documents([query])
doc = nlp(file)
sentences_1 = [str(sentence) for sentence in doc.sents]
embedded_text = embedding.embed_documents(sentences_1)
similarity_score = cosine_similarity(embeded_query,embedded_text)
similarity_array.append(similarity_score)
most_similar_index = np.argmax(similarity_array)
most_similar_documents = sentences_1[most_similar_index]
splitter_text = text_splitter.split_text(file)
recursive_embedded_text = embedding.embed_documents(splitter_text)
most_similar_embed = embedding.embed_documents([most_similar_documents])
final_similarity_score = cosine_similarity(most_similar_embed,recursive_embedded_text)
final_similarity_index = np.argmax(final_similarity_score)
final_document = splitter_text[final_similarity_index]
prompt_formated = prompt.format(context = final_document, query = query)
response = llm.invoke(prompt_formated).content
history.append((query, response))
return '', history
summary_template = """ You an article summarizer and have been provided with this file
{document}
provide a one line summary of the content of the provides file.
"""
summary_prompt = PromptTemplate(input_variables= ['document'], template=summary_template)
template = """ You are a knowledgeable chatbot that gently answers questions.
You know the following context information.
{context}
Answer to the following question from a user. Use only information from the previous context. Do not invent or assume stuff.
Question: {query}
Answer:"""
prompt = PromptTemplate(input_variables= ['context', 'query'], template= template)
llm =ChatCohere(cohere_api_key=os.getenv('COHERE_API_KEY'))
with gr.Blocks(theme='finlaymacklon/smooth_slate') as demo:
signal = gr.Markdown('''# Welcome to Chat with Docs
I am an AI that recieves a **PDF** and can answer questions on the content of the document.''')
inp = gr.File()
out = gr.Textbox(label= 'Summary')
inp.upload(fn= recieve_pdf,inputs= inp,outputs=out,show_progress=True)
signal_1 = gr.Markdown('Use the Textbox below to chat. **Ask** questions regarding the pdf you uploaded')
chat = gr.Chatbot()
msg = gr.Textbox(info='input your chat')
with gr.Row():
submit = gr.Button('Send')
clear = gr.ClearButton([msg,chat])
msg.submit(chatbot, [msg, chat], [msg ,chat])
submit.click(chatbot, [msg, chat], [msg ,chat])
feedback = gr.Markdown('# [Please use this to provide feedback](https://forms.gle/oNZKx4nL7DmmJ64g8)')
demo.launch()
|