DrishtiSharma commited on
Commit
87b256d
Β·
verified Β·
1 Parent(s): 78b3aca

Create interim.py

Browse files
Files changed (1) hide show
  1. interim.py +186 -0
interim.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ from langchain.chains import SequentialChain, LLMChain
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain_groq import ChatGroq
7
+ from langchain.document_loaders import PDFPlumberLoader
8
+ from langchain_experimental.text_splitter import SemanticChunker
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+ from langchain_chroma import Chroma
11
+
12
+ # Set API Keys
13
+ os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
14
+
15
+ # Load LLM models
16
+ llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
17
+ rag_llm = ChatGroq(model="mixtral-8x7b-32768")
18
+
19
+ st.title("❓")
20
+
21
+ # Step 1: Choose PDF Source
22
+ #### Initialize pdf_path
23
+ pdf_path = None
24
+ pdf_source = st.radio("Upload or provide a link to a PDF:", ["Upload a PDF file", "Enter a PDF URL"], index=0)
25
+
26
+ if pdf_source == "Upload a PDF file":
27
+ uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
28
+ if uploaded_file:
29
+ with open("temp.pdf", "wb") as f:
30
+ f.write(uploaded_file.getbuffer())
31
+ pdf_path = "temp.pdf"
32
+
33
+ elif pdf_source == "Enter a PDF URL":
34
+ pdf_url = st.text_input("Enter PDF URL:")
35
+ if pdf_url:
36
+ with st.spinner("Downloading PDF..."):
37
+ try:
38
+ response = requests.get(pdf_url)
39
+ if response.status_code == 200:
40
+ with open("temp.pdf", "wb") as f:
41
+ f.write(response.content)
42
+ pdf_path = "temp.pdf"
43
+ st.success("βœ… PDF Downloaded Successfully!")
44
+ else:
45
+ st.error("❌ Failed to download PDF. Check the URL.")
46
+ pdf_path = None
47
+ except Exception as e:
48
+ st.error(f"Error downloading PDF: {e}")
49
+ pdf_path = None
50
+ else:
51
+ pdf_path = None
52
+
53
+ # Step 2: Process PDF
54
+ if pdf_path:
55
+ with st.spinner("Loading PDF..."):
56
+ loader = PDFPlumberLoader(pdf_path)
57
+ docs = loader.load()
58
+
59
+ st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
60
+
61
+ # Step 3: Chunking
62
+ with st.spinner("Chunking the document..."):
63
+ model_name = "nomic-ai/modernbert-embed-base"
64
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
65
+
66
+ text_splitter = SemanticChunker(embedding_model)
67
+ documents = text_splitter.split_documents(docs)
68
+
69
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
70
+
71
+ # Step 4: Setup Vectorstore
72
+ with st.spinner("Creating vector store..."):
73
+ vector_store = Chroma(
74
+ collection_name="deepseek_collection",
75
+ collection_metadata={"hnsw:space": "cosine"},
76
+ embedding_function=embedding_model
77
+ )
78
+ vector_store.add_documents(documents)
79
+
80
+ st.success("βœ… **Vector Store Created!**")
81
+
82
+ # Step 5: Query Input
83
+ query = st.text_input("πŸ” Enter a Query:")
84
+ if query:
85
+ with st.spinner("Retrieving relevant contexts..."):
86
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
87
+ contexts = retriever.invoke(query)
88
+ context_texts = [doc.page_content for doc in contexts]
89
+
90
+ st.success(f"βœ… **Retrieved {len(context_texts)} Contexts!**")
91
+ for i, text in enumerate(context_texts, 1):
92
+ st.write(f"**Context {i}:** {text[:500]}...")
93
+
94
+ # Step 6: Context Relevancy Checker
95
+ with st.spinner("Evaluating context relevancy..."):
96
+ relevancy_prompt = PromptTemplate(
97
+ input_variables=["retriever_query", "context"],
98
+ template="""You are an expert judge. Assign relevancy scores (0 or 1) for each context to answer the query.
99
+
100
+ CONTEXT LIST:
101
+ {context}
102
+
103
+ QUERY:
104
+ {retriever_query}
105
+
106
+ RESPONSE (JSON):
107
+ [{{"content": 1, "score": <0 or 1>, "reasoning": "<explanation>"}},
108
+ {{"content": 2, "score": <0 or 1>, "reasoning": "<explanation>"}},
109
+ ...]"""
110
+ )
111
+ context_relevancy_chain = LLMChain(llm=llm_judge, prompt=relevancy_prompt, output_key="relevancy_response")
112
+ relevancy_response = context_relevancy_chain.invoke({"context": context_texts, "retriever_query": query})
113
+
114
+ st.success("βœ… **Context Relevancy Evaluated!**")
115
+ st.json(relevancy_response['relevancy_response'])
116
+
117
+ # Step 7: Selecting Relevant Contexts
118
+ with st.spinner("Selecting the most relevant contexts..."):
119
+ relevant_prompt = PromptTemplate(
120
+ input_variables=["relevancy_response"],
121
+ template="""Extract contexts with score 0 from the relevancy response.
122
+
123
+ RELEVANCY RESPONSE:
124
+ {relevancy_response}
125
+
126
+ RESPONSE (JSON):
127
+ [{{"content": <content number>}}]
128
+ """
129
+ )
130
+ pick_relevant_context_chain = LLMChain(llm=llm_judge, prompt=relevant_prompt, output_key="context_number")
131
+ relevant_response = pick_relevant_context_chain.invoke({"relevancy_response": relevancy_response['relevancy_response']})
132
+
133
+ st.success("βœ… **Relevant Contexts Selected!**")
134
+ st.json(relevant_response['context_number'])
135
+
136
+ # Step 8: Retrieving Context for Response Generation
137
+ with st.spinner("Retrieving final context..."):
138
+ context_prompt = PromptTemplate(
139
+ input_variables=["context_number", "context"],
140
+ template="""Extract actual content for the selected context numbers.
141
+
142
+ CONTEXT NUMBERS:
143
+ {context_number}
144
+
145
+ CONTENT LIST:
146
+ {context}
147
+
148
+ RESPONSE (JSON):
149
+ [{{"context_number": <content number>, "relevant_content": "<actual context>"}}]
150
+ """
151
+ )
152
+ relevant_contexts_chain = LLMChain(llm=llm_judge, prompt=context_prompt, output_key="relevant_contexts")
153
+ final_contexts = relevant_contexts_chain.invoke({"context_number": relevant_response['context_number'], "context": context_texts})
154
+
155
+ st.success("βœ… **Final Contexts Retrieved!**")
156
+ st.json(final_contexts['relevant_contexts'])
157
+
158
+ # Step 9: Generate Final Response
159
+ with st.spinner("Generating the final answer..."):
160
+ rag_prompt = PromptTemplate(
161
+ input_variables=["query", "context"],
162
+ template="""Generate a clear, fact-based response based on the context.
163
+
164
+ QUERY:
165
+ {query}
166
+
167
+ CONTEXT:
168
+ {context}
169
+
170
+ ANSWER:
171
+ """
172
+ )
173
+ response_chain = LLMChain(llm=rag_llm, prompt=rag_prompt, output_key="final_response")
174
+ final_response = response_chain.invoke({"query": query, "context": final_contexts['relevant_contexts']})
175
+
176
+ st.success("βœ… **Final Response Generated!**")
177
+ st.success(final_response['final_response'])
178
+
179
+ # Step 10: Display Workflow Breakdown
180
+ st.write("πŸ” **Workflow Breakdown:**")
181
+ st.json({
182
+ "Context Relevancy Evaluation": relevancy_response["relevancy_response"],
183
+ "Relevant Contexts": relevant_response["context_number"],
184
+ "Extracted Contexts": final_contexts["relevant_contexts"],
185
+ "Final Answer": final_response["final_response"]
186
+ })