ankitv42 commited on
Commit
2c98ffc
Β·
verified Β·
1 Parent(s): 91781c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -68
app.py CHANGED
@@ -1,68 +1,86 @@
1
- import streamlit as st
2
- import os
3
- import tempfile
4
- from langchain_community.vectorstores import FAISS
5
- from langchain_groq import ChatGroq
6
- from langchain_community.embeddings import HuggingFaceBgeEmbeddings
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain_core.runnables import RunnablePassthrough
9
- from langchain.document_loaders import PyPDFLoader
10
- from langchain import hub
11
-
12
- # Set API key (replace with your actual key)
13
- os.environ["GROQ_API_KEY"] = "your_groq_api_key"
14
-
15
- # Streamlit UI
16
- st.title("πŸ“„ PDF Chatbot with RAG")
17
- st.write("Upload a PDF and ask questions!")
18
-
19
- # File uploader
20
- uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
21
-
22
- if uploaded_file:
23
- # Save uploaded PDF temporarily
24
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
25
- temp_file.write(uploaded_file.read())
26
- temp_file_path = temp_file.name
27
-
28
- # Load and process PDF
29
- loader = PyPDFLoader(temp_file_path)
30
- docs = loader.load()
31
-
32
- # Initialize LLM and Embeddings
33
- llm = ChatGroq(model="llama3-8b-8192")
34
- model_name = "BAAI/bge-small-en"
35
- hf_embeddings = HuggingFaceBgeEmbeddings(
36
- model_name=model_name,
37
- model_kwargs={'device': 'cpu'},
38
- encode_kwargs={'normalize_embeddings': True}
39
- )
40
-
41
- # Split text
42
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
43
- splits = text_splitter.split_documents(docs)
44
-
45
- # Create FAISS vector store
46
- vectorstore = FAISS.from_documents(documents=splits, embedding=hf_embeddings)
47
- retriever = vectorstore.as_retriever()
48
-
49
- # Load RAG prompt
50
- prompt = hub.pull("rlm/rag-prompt")
51
-
52
- def format_docs(docs):
53
- return "\n\n".join(doc.page_content for doc in docs)
54
-
55
- # RAG Chain
56
- rag_chain = (
57
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
58
- | prompt
59
- | llm
60
- )
61
-
62
- # User Query
63
- user_query = st.text_input("Ask a question from the PDF:")
64
-
65
- if user_query:
66
- response = rag_chain.invoke(user_query)
67
- st.write("### πŸ€– AI Response:")
68
- st.write(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import gradio as gr
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_groq import ChatGroq
6
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_core.runnables import RunnablePassthrough
9
+ from langchain.document_loaders import PyPDFLoader
10
+ from langchain import hub
11
+
12
+ # Set API key (Replace with your actual key)
13
+ os.environ["GROQ_API_KEY"] = "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"
14
+
15
+ # Initialize LLM and Embeddings
16
+ llm = ChatGroq(model="llama3-8b-8192")
17
+ model_name = "BAAI/bge-small-en"
18
+ hf_embeddings = HuggingFaceBgeEmbeddings(
19
+ model_name=model_name,
20
+ model_kwargs={'device': 'cpu'},
21
+ encode_kwargs={'normalize_embeddings': True}
22
+ )
23
+
24
+ # Function to process PDF
25
+ def process_pdf(file):
26
+ if file is None:
27
+ return "Please upload a PDF file."
28
+
29
+ # Save PDF temporarily
30
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
31
+ temp_file.write(file)
32
+ temp_file_path = temp_file.name
33
+
34
+ # Load and process PDF
35
+ loader = PyPDFLoader(temp_file_path)
36
+ docs = loader.load()
37
+
38
+ # Split text
39
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
40
+ splits = text_splitter.split_documents(docs)
41
+
42
+ # Create FAISS vector store
43
+ vectorstore = FAISS.from_documents(documents=splits, embedding=hf_embeddings)
44
+ retriever = vectorstore.as_retriever()
45
+
46
+ # Load RAG prompt
47
+ prompt = hub.pull("rlm/rag-prompt")
48
+
49
+ def format_docs(docs):
50
+ return "\n\n".join(doc.page_content for doc in docs)
51
+
52
+ # RAG Chain
53
+ global rag_chain
54
+ rag_chain = (
55
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
56
+ | prompt
57
+ | llm
58
+ )
59
+
60
+ return "PDF processed successfully! Now ask questions."
61
+
62
+ # Function to answer queries
63
+ def ask_question(query):
64
+ if "rag_chain" not in globals():
65
+ return "Please upload and process a PDF first."
66
+
67
+ response = rag_chain.invoke(query)
68
+ return response
69
+
70
+ # Gradio UI
71
+ with gr.Blocks() as demo:
72
+ gr.Markdown("# πŸ“„ PDF Chatbot with RAG")
73
+ gr.Markdown("Upload a PDF and ask questions!")
74
+
75
+ pdf_input = gr.File(label="Upload PDF", type="binary")
76
+ process_button = gr.Button("Process PDF")
77
+ output_message = gr.Textbox(label="Status", interactive=False)
78
+
79
+ query_input = gr.Textbox(label="Ask a Question")
80
+ submit_button = gr.Button("Submit")
81
+ response_output = gr.Textbox(label="AI Response")
82
+
83
+ process_button.click(process_pdf, inputs=pdf_input, outputs=output_message)
84
+ submit_button.click(ask_question, inputs=query_input, outputs=response_output)
85
+
86
+ demo.launch()