Chandranshu Jain commited on
Commit
9d8ec30
1 Parent(s): 58581ea

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from langchain_chroma import Chroma
12
+ import tempfile
13
+ from langchain_cohere import CohereEmbeddings
14
+
15
+ #st.set_page_config(page_title="Document Genie", layout="wide")
16
+
17
+ #st.markdown("""
18
+ ### PDFChat: Get instant insights from your PDF
19
+
20
+ #This chatbot is built using the Retrieval-Augmented Generation (RAG) framework, leveraging Google's Generative AI model Gemini-PRO. It processes uploaded PDF documents by breaking them down into manageable chunks, creates a searchable vector store, and generates accurate answers to user queries. This advanced approach ensures high-quality, contextually relevant responses for an efficient and effective user experience.
21
+
22
+ #### How It Works
23
+
24
+ #Follow these simple steps to interact with the chatbot:
25
+
26
+ #1. **Upload Your Document**: The system accepts a PDF file at one time, analyzing the content to provide comprehensive insights.
27
+
28
+ #2. **Ask a Question**: After processing the document, ask any question related to the content of your uploaded document for a precise answer.
29
+ #""")
30
+
31
+ #def get_pdf(pdf_docs):
32
+ # loader = PyPDFLoader(pdf_docs)
33
+ # docs = loader.load()
34
+ # return docs
35
+
36
+ def get_pdf(uploaded_file):
37
+ if uploaded_file :
38
+ temp_file = "./temp.pdf"
39
+ # Delete the existing temp.pdf file if it exists
40
+ if os.path.exists(temp_file):
41
+ os.remove(temp_file)
42
+ with open(temp_file, "wb") as file:
43
+ file.write(uploaded_file.getvalue())
44
+ file_name = uploaded_file.name
45
+ loader = PyPDFLoader(temp_file)
46
+ docs = loader.load()
47
+ return docs
48
+
49
+ def text_splitter(text):
50
+ text_splitter = RecursiveCharacterTextSplitter(
51
+ # Set a really small chunk size, just to show.
52
+ chunk_size=100000,
53
+ chunk_overlap=50000,
54
+ separators=["\n\n","\n"," ",".",","])
55
+ chunks=text_splitter.split_documents(text)
56
+ return chunks
57
+
58
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
59
+ COHERE_API_KEY = os.getenv("COHERE_API_KEY")
60
+
61
+ def get_conversational_chain():
62
+ prompt_template = """
63
+ Given the following extracted parts of a long document and a question, create a final answer.
64
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
65
+ provided context just say, "answer is not available in the context", and then ignore the context and add the answer from your knowledge like a simple llm prompt.
66
+ Try to give atleast the basic information.Donot return blank answer.\n\n
67
+ Make sure to understand the question and answer as per the question.
68
+ The answer should be a detailed one and try to incorporate examples for better understanding.
69
+ If the question involves terms like detailed or explained , give answer which involves complete detail about the question.\n\n
70
+ Context:\n {context}?\n
71
+ Question: \n{question}\n
72
+ Answer:
73
+ """
74
+ #model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, google_api_key=GOOGLE_API_KEY)
75
+ model = ChatGoogleGenerativeAI(model="gemini-1.0-pro-latest", temperature=0.3, google_api_key=GOOGLE_API_KEY)
76
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
77
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
78
+ return chain
79
+
80
+ def embedding(chunk,query):
81
+ #embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
82
+ embeddings = CohereEmbeddings(model="embed-english-v3.0")
83
+ db = Chroma.from_documents(chunk,embeddings)
84
+ doc = db.similarity_search(query)
85
+ print(doc)
86
+ chain = get_conversational_chain()
87
+ response = chain({"input_documents": doc, "question": query}, return_only_outputs=True)
88
+ print(response)
89
+ return response["output_text"]
90
+ #st.write("Reply: ", response["output_text"])
91
+
92
+ if 'messages' not in st.session_state:
93
+ st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
94
+
95
+
96
+ st.header("Chat with your pdf💁")
97
+ with st.sidebar:
98
+ st.title("PDF FILE UPLOAD:")
99
+ pdf_docs = st.file_uploader("Upload your PDF File and Click on the Submit & Process Button", accept_multiple_files=False, key="pdf_uploader")
100
+
101
+ query = st.chat_input("Ask a Question from the PDF File")
102
+ if query:
103
+ raw_text = get_pdf(pdf_docs)
104
+ text_chunks = text_splitter(raw_text)
105
+ st.session_state.messages.append({'role': 'user', "content": query})
106
+ response = embedding(text_chunks,query)
107
+ st.session_state.messages.append({'role': 'assistant', "content": response})
108
+
109
+ for message in st.session_state.messages:
110
+ with st.chat_message(message['role']):
111
+ st.write(message['content'])