AkashVD26 commited on
Commit
84783e8
β€’
1 Parent(s): 399812e

Added files

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +117 -0
  3. requirements.txt +14 -0
.gitignore CHANGED
@@ -7,6 +7,7 @@ __pycache__/
7
  *.so
8
 
9
  # Distribution / packaging
 
10
  .Python
11
  build/
12
  develop-eggs/
@@ -122,6 +123,7 @@ celerybeat.pid
122
  *.sage.py
123
 
124
  # Environments
 
125
  .env
126
  .venv
127
  env/
 
7
  *.so
8
 
9
  # Distribution / packaging
10
+ research/
11
  .Python
12
  build/
13
  develop-eggs/
 
123
  *.sage.py
124
 
125
  # Environments
126
+ pdfsensevenv/
127
  .env
128
  .venv
129
  env/
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing libraries
2
+ import streamlit as st
3
+ from langchain.chains.history_aware_retriever import create_history_aware_retriever
4
+ from langchain.chains.retrieval import create_retrieval_chain
5
+ from langchain.chains.combine_documents import create_stuff_documents_chain
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.chat_message_histories import ChatMessageHistory
8
+ from langchain_core.chat_history import BaseChatMessageHistory
9
+ from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
10
+ from langchain_groq import ChatGroq
11
+ from langchain_core.runnables.history import RunnableWithMessageHistory
12
+ from langchain_huggingface import HuggingFaceEmbeddings
13
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
14
+ from langchain_community.document_loaders import PyPDFLoader
15
+ from langchain_core.output_parsers import StrOutputParser
16
+ import os
17
+ from dotenv import load_dotenv
18
+ load_dotenv()
19
+
20
+ # API and model setting
21
+ os.environ['HF_TOKEN']=os.getenv('HF_TOKEN')
22
+ os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
23
+ embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
24
+
25
+ # Streamlit app
26
+ st.title("𝖯𝖣π–₯π–²π–Ύπ—‡π—Œπ–Ύ : 𝖯𝖣π–₯ π–°π—Žπ–Ύπ—Œπ—π—‚π—ˆπ—‡ 𝖺𝗇𝖽 π– π—‡π—Œπ—π–Ύπ—‹π—‚π—‡π—€ 𝗐𝗂𝗍𝗁 π—Œπ–Ύπ—Œπ—Œπ—‚π—ˆπ—‡ 𝖼𝗁𝖺𝗍 π—π—‚π—Œπ—π—ˆπ—‹π—’")
27
+ st.write("upload pdfs and ask questions related to pdfs")
28
+ llm=ChatGroq(model="Gemma2-9b-It")
29
+ session_id=st.text_input("Session id",value="common_session")
30
+
31
+ # manage chat history
32
+ if 'store' not in st.session_state:
33
+ st.session_state.store={}
34
+
35
+ # Upload files and documents loading
36
+ uploaded_files=st.file_uploader("Drop the pdf files here",type="pdf",accept_multiple_files=True)
37
+ if uploaded_files:
38
+ documents=[]
39
+ for uploaded_file in uploaded_files:
40
+ temppdf=f"./temp.pdf"
41
+ with open(temppdf,"wb") as file:
42
+ file.write(uploaded_file.getvalue())
43
+ file_name=uploaded_file.name
44
+ docs=PyPDFLoader(temppdf).load()
45
+ documents.extend(docs)
46
+ # Delete the temp file as we no longer need it
47
+ if os.path.exists("./temp.pdf"):
48
+ os.remove("./temp.pdf")
49
+ # Text splitting and embedding and storing in chromadb
50
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=5000,chunk_overlap=500)
51
+ splits=text_splitter.split_documents(documents)
52
+ faiss_index = FAISS.from_documents(splits, embeddings)
53
+ retriever=faiss_index.as_retriever()
54
+
55
+ # Prompts
56
+ context_system_prompt=(
57
+ "Given a chat history and latest user question"
58
+ "which might reference context in the chat history, "
59
+ "formulate a standalone question which can be understood "
60
+ "without the chat history. Do Not answer the question, "
61
+ "just reformulate it if needed and otherwise return it as it is"
62
+ )
63
+ context_prompt=ChatPromptTemplate.from_messages([
64
+ ("system",context_system_prompt),
65
+ MessagesPlaceholder("chat_history"),
66
+ ("human","{input}")]
67
+ )
68
+
69
+ history_aware_ret=create_history_aware_retriever(llm,retriever,context_prompt)
70
+
71
+ system_prompt=(
72
+ "You are 'PDFSense' a PDF reading and answering assistant. "
73
+ "Use the following pieces of retrieved context to answer "
74
+ "the question. If you don't know the answer, say that you dont know."
75
+ "Answer the questions nicely."
76
+ "\n\n"
77
+ "{context}"
78
+ )
79
+
80
+ prompt=ChatPromptTemplate.from_messages(
81
+ [
82
+ ("system",system_prompt),
83
+ MessagesPlaceholder("chat_history"),
84
+ ("human","{input}")
85
+ ]
86
+ )
87
+ # Chain for the chatbot
88
+ qa_chain=create_stuff_documents_chain(llm,prompt)
89
+ rag_chain=create_retrieval_chain(history_aware_ret,qa_chain)
90
+
91
+ # Session Id storing in chat history
92
+ def get_session_history(session:str)-> BaseChatMessageHistory:
93
+ if session_id not in st.session_state.store:
94
+ st.session_state.store[session_id]=ChatMessageHistory()
95
+ return st.session_state.store[session_id]
96
+
97
+ # RAG with history
98
+ conversation_rag=RunnableWithMessageHistory(
99
+ rag_chain,
100
+ get_session_history,
101
+ input_messages_key="input",
102
+ history_messages_key="chat_history",
103
+ output_messages_key="answer"
104
+ )
105
+
106
+ user_input=st.text_input("Enter question")
107
+ if user_input:
108
+ session_history=get_session_history(session_id)
109
+ response=conversation_rag.invoke(
110
+ {"input":user_input},
111
+ config={
112
+ "configurable":{"session_id":session_id}
113
+ },
114
+ )
115
+ st.write(st.session_state.store)
116
+ st.write("Assistant:",response['answer'])
117
+ st.write("Chat History",session_history.messages)
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ipykernel
2
+ streamlit
3
+ faiss-cpu
4
+ langchain
5
+ langchain_huggingface
6
+ langchain_groq
7
+ gradio
8
+ typing-extensions
9
+ python-dotenv
10
+ langchain_community
11
+ pypdf
12
+ pymupdf
13
+ langchain-text-splitters
14
+ langchain_huggingface