tayy786 commited on
Commit
4590062
·
verified ·
1 Parent(s): 6fcf56d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -0
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import faiss
3
+ import numpy as np
4
+ import gradio as gr
5
+ from pypdf import PdfReader
6
+ from sentence_transformers import SentenceTransformer
7
+ from groq import Groq
8
+
9
+ # -----------------------------
10
+ # Initialize Models
11
+ # -----------------------------
12
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
13
+
14
+ # Safely load API key
15
+ GROQ_API_KEY = os.getenv("Rag")
16
+ client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
17
+
18
+ # -----------------------------
19
+ # Global Storage
20
+ # -----------------------------
21
+ index = None
22
+ documents = []
23
+
24
+ # -----------------------------
25
+ # PDF Processing
26
+ # -----------------------------
27
+ def read_pdf(file):
28
+ try:
29
+ reader = PdfReader(file.name) # FIX for Hugging Face
30
+ text = ""
31
+ for page in reader.pages:
32
+ if page.extract_text():
33
+ text += page.extract_text()
34
+ return text
35
+ except Exception as e:
36
+ return f"Error reading PDF: {str(e)}"
37
+
38
+
39
+ def chunk_text(text, chunk_size=500, overlap=100):
40
+ chunks = []
41
+ start = 0
42
+
43
+ while start < len(text):
44
+ end = start + chunk_size
45
+ chunks.append(text[start:end])
46
+ start += chunk_size - overlap
47
+
48
+ return chunks
49
+
50
+
51
+ # -----------------------------
52
+ # Create FAISS Index
53
+ # -----------------------------
54
+ def create_index(chunks):
55
+ global index, documents
56
+
57
+ documents = chunks
58
+ embeddings = embedder.encode(chunks)
59
+
60
+ embeddings = np.array(embeddings).astype("float32")
61
+
62
+ dimension = embeddings.shape[1]
63
+ index = faiss.IndexFlatL2(dimension)
64
+ index.add(embeddings)
65
+
66
+
67
+ # -----------------------------
68
+ # Retrieval
69
+ # -----------------------------
70
+ def retrieve(query, k=3, threshold=1.2):
71
+ global index
72
+
73
+ if index is None:
74
+ return [], None
75
+
76
+ query_embedding = embedder.encode([query])
77
+ query_embedding = np.array(query_embedding).astype("float32")
78
+
79
+ distances, indices = index.search(query_embedding, k)
80
+
81
+ relevant_chunks = []
82
+ valid_distances = []
83
+
84
+ for i, dist in zip(indices[0], distances[0]):
85
+ if i < len(documents) and dist < threshold:
86
+ relevant_chunks.append(documents[i])
87
+ valid_distances.append(dist)
88
+
89
+ # Confidence
90
+ confidence = None
91
+ if valid_distances:
92
+ avg = np.mean(valid_distances)
93
+ if avg < 0.5:
94
+ confidence = "High"
95
+ elif avg < 1.0:
96
+ confidence = "Medium"
97
+ else:
98
+ confidence = "Low"
99
+
100
+ return relevant_chunks, confidence
101
+
102
+
103
+ # -----------------------------
104
+ # LLM (Groq)
105
+ # -----------------------------
106
+ def ask_groq(context_chunks, question):
107
+ if client is None:
108
+ return "Error: GROQ_API_KEY not set in Hugging Face Secrets."
109
+
110
+ context = "\n".join(context_chunks)
111
+
112
+ prompt = f"""
113
+ You are an intelligent assistant.
114
+
115
+ Rules:
116
+ 1. If answer is clearly in context → answer normally.
117
+ 2. If related but not exact → say:
118
+ "This is not explicitly mentioned in the document, but based on related context..."
119
+ 3. If irrelevant → say:
120
+ "The document does not contain information related to this question."
121
+
122
+ Context:
123
+ {context}
124
+
125
+ Question:
126
+ {question}
127
+ """
128
+
129
+ try:
130
+ response = client.chat.completions.create(
131
+ messages=[{"role": "user", "content": prompt}],
132
+ model="llama-3.3-70b-versatile",
133
+ )
134
+ return response.choices[0].message.content
135
+ except Exception as e:
136
+ return f"Groq API Error: {str(e)}"
137
+
138
+
139
+ # -----------------------------
140
+ # Main Functions
141
+ # -----------------------------
142
+ def process_pdf(file):
143
+ if file is None:
144
+ return "Please upload a PDF."
145
+
146
+ text = read_pdf(file)
147
+
148
+ if not text or "Error" in text:
149
+ return text
150
+
151
+ chunks = chunk_text(text)
152
+ create_index(chunks)
153
+
154
+ return f"✅ PDF processed! Chunks: {len(chunks)}"
155
+
156
+
157
+ def answer_question(question):
158
+ if index is None:
159
+ return "Please upload and process a PDF first."
160
+
161
+ context_chunks, confidence = retrieve(question)
162
+
163
+ if not context_chunks:
164
+ return "The document does not contain information related to this question."
165
+
166
+ answer = ask_groq(context_chunks, question)
167
+
168
+ if confidence:
169
+ answer = f"(Confidence: {confidence})\n\n{answer}"
170
+
171
+ return answer
172
+
173
+
174
+ # -----------------------------
175
+ # Gradio UI
176
+ # -----------------------------
177
+ with gr.Blocks() as demo:
178
+ gr.Markdown("## 📄 RAG PDF Q&A (Groq + FAISS)")
179
+
180
+ file_input = gr.File(label="Upload PDF")
181
+ upload_btn = gr.Button("Process PDF")
182
+ status = gr.Textbox(label="Status")
183
+
184
+ question = gr.Textbox(label="Ask a question")
185
+ answer = gr.Textbox(label="Answer")
186
+
187
+ upload_btn.click(process_pdf, inputs=file_input, outputs=status)
188
+ question.submit(answer_question, inputs=question, outputs=answer)
189
+
190
+ # -----------------------------
191
+ # Launch
192
+ # -----------------------------
193
+ if __name__ == "__main__":
194
+ demo.launch(server_name="0.0.0.0", server_port=7860)