Prathamesh1420 commited on
Commit
c9e2b1b
·
verified ·
1 Parent(s): 9e122a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -186
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import gradio as gr
4
  import requests
@@ -37,7 +36,7 @@ load_dotenv()
37
  PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", "")
38
  MLFLOW_TRACKING_URI = os.environ.get("MLFLOW_TRACKING_URI", "http://localhost:5000")
39
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
40
- LITSERVE_ENDPOINT = os.environ.get("LITSERVE_ENDPOINT", "https://8001-01k2h9d9mervcmgfn66ybkpwvq.cloudspaces.litng.ai/predict")
41
 
42
  # DagsHub & MLflow Setup (guarded)
43
  try:
@@ -70,31 +69,29 @@ Question: {question}
70
  Answer:
71
  """, "artifacts/prompt_template.txt")
72
 
73
- # ----------- 1. Custom LLM for LitServe endpoint (Lightning AI) -----------
74
  class LitServeLLM(LLM):
75
  endpoint_url: str
76
 
77
- @mlflow.trace
78
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
79
  payload = {"prompt": prompt}
80
- with mlflow.start_span("lit_serve_request"):
81
- start_time = time.time()
82
- response = requests.post(self.endpoint_url, json=payload)
83
- latency = time.time() - start_time
84
- mlflow.log_metric("lit_serve_latency", latency)
85
- if response.status_code == 200:
86
- data = response.json()
87
- mlflow.log_metric("response_tokens", len(data.get("response", "").split()))
88
- return data.get("response", "").strip()
89
- else:
90
- mlflow.log_metric("request_errors", 1)
91
- error_info = {
92
- "status_code": response.status_code,
93
- "error": response.text,
94
- "timestamp": datetime.now().isoformat()
95
- }
96
- mlflow.log_dict(error_info, "artifacts/error_log.json")
97
- raise ValueError(f"Request failed: {response.status_code}")
98
 
99
  @property
100
  def _identifying_params(self) -> Mapping[str, Any]:
@@ -105,9 +102,7 @@ class LitServeLLM(LLM):
105
  return "litserve_llm"
106
 
107
  # ----------- 2. Pinecone Connection -----------
108
- @mlflow.trace
109
  def init_pinecone():
110
- PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
111
  pc = Pinecone(api_key=PINECONE_API_KEY)
112
  return pc.Index("rag-granite-index")
113
 
@@ -119,33 +114,21 @@ except Exception:
119
  # ----------- 3. Embedding Model -----------
120
  embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
121
 
122
- # ----------- 4. Context Retrieval with Tracing -----------
123
- @mlflow.trace
124
  def get_retrieved_context(query: str, top_k=3):
125
- with mlflow.start_span("embedding_generation"):
126
- start_time = time.time()
127
- query_embedding = embeddings_model.embed_query(query)
128
- mlflow.log_metric("embedding_latency", time.time() - start_time)
129
-
130
  if index is None:
131
  return ""
132
-
133
- with mlflow.start_span("pinecone_query"):
134
- start_time = time.time()
135
- results = index.query(
136
- namespace="rag-ns",
137
- vector=query_embedding,
138
- top_k=top_k,
139
- include_metadata=True
140
- )
141
- mlflow.log_metric("pinecone_latency", time.time() - start_time)
142
- mlflow.log_metric("retrieved_chunks", len(results['matches']))
143
-
144
  context_parts = [match['metadata']['text'] for match in results['matches']]
145
  return "\n".join(context_parts)
146
 
147
-
148
- # ----------- 5. LLM Chain Setup (Lightning AI generator) -----------
149
  model = LitServeLLM(endpoint_url=LITSERVE_ENDPOINT)
150
 
151
  prompt = PromptTemplate(
@@ -162,42 +145,24 @@ Answer:
162
 
163
  llm_chain = LLMChain(llm=model, prompt=prompt)
164
 
165
- # ----------- 6. RAG Pipeline with Full Tracing (uses Lightning AI) -----------
166
- @mlflow.trace
167
  def rag_pipeline(question):
168
  try:
169
- with mlflow.start_run(run_name=f"Query-{datetime.now().strftime('%H%M%S')}", nested=True):
170
- mlflow.log_param("user_question", question)
171
- retrieved_context = get_retrieved_context(question)
172
- mlflow.log_text(retrieved_context, "artifacts/retrieved_context.txt")
173
-
174
- start_time = time.time()
175
- response_obj = llm_chain.invoke({
176
- "context": retrieved_context,
177
- "question": question
178
- })
179
- response = response_obj.get("text") if isinstance(response_obj, dict) else getattr(response_obj, "text", str(response_obj))
180
- response = response.strip()
181
-
182
- if "Answer:" in response:
183
- response = response.split("Answer:", 1)[-1].strip()
184
-
185
- mlflow.log_metric("response_latency", time.time() - start_time)
186
- mlflow.log_metric("response_length", len(response))
187
- mlflow.log_text(response, "artifacts/response.txt")
188
-
189
- return response
190
  except Exception as e:
191
- mlflow.log_metric("pipeline_errors", 1)
192
- error_info = {
193
- "error": str(e),
194
- "question": question,
195
- "timestamp": datetime.now().isoformat()
196
- }
197
  mlflow.log_dict(error_info, "artifacts/pipeline_errors.json")
198
  return f"Error: {str(e)}"
199
 
200
- # ----------- 7. DeepEval Wrapper(s) and Metrics Integration (Gemini evaluation) -----------
201
  class GoogleVertexAI(DeepEvalBaseLLM):
202
  def __init__(self, model):
203
  self.model = model
@@ -214,11 +179,6 @@ class GoogleVertexAI(DeepEvalBaseLLM):
214
  return res.get('content') or res.get('text') or str(res)
215
  return str(res)
216
 
217
- async def a_generate(self, prompt: str) -> str:
218
- chat_model = self.load_model()
219
- res = await chat_model.ainvoke(prompt)
220
- return getattr(res, 'content', str(res))
221
-
222
  def get_model_name(self):
223
  return "Vertex AI Model"
224
 
@@ -232,13 +192,10 @@ class LitServeWrapper(DeepEvalBaseLLM):
232
  def generate(self, prompt: str) -> str:
233
  return self.lit_llm._call(prompt)
234
 
235
- async def a_generate(self, prompt: str) -> str:
236
- return self.generate(prompt)
237
-
238
  def get_model_name(self):
239
  return "LitServeModel"
240
 
241
- # Custom metric that DOES NOT require expected_output: Length-based utility metric
242
  class LengthMetric(BaseMetric):
243
  def __init__(self, min_tokens: int = 1, max_tokens: int = 200):
244
  self.min_tokens = min_tokens
@@ -256,9 +213,6 @@ class LengthMetric(BaseMetric):
256
  self.success = (self.min_tokens <= tokens <= self.max_tokens)
257
  return self.score
258
 
259
- async def a_measure(self, test_case: LLMTestCase):
260
- return self.measure(test_case)
261
-
262
  def is_successful(self):
263
  return self.success
264
 
@@ -266,142 +220,73 @@ class LengthMetric(BaseMetric):
266
  def name(self):
267
  return "Length Metric"
268
 
269
- # Helper to get eval model: GEMINI will be used as evaluator by default
270
  def get_deepeval_model(choice: str = 'gemini'):
271
  if choice == 'gemini' and ChatGoogleGenerativeAI is not None and GOOGLE_API_KEY:
272
- try:
273
- genai.configure(api_key=GOOGLE_API_KEY)
274
- except Exception:
275
- pass
276
  chat_model = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
277
  return GoogleVertexAI(model=chat_model)
278
  else:
279
- # fallback to litserve wrapper if gemini isn't available
280
  return LitServeWrapper(lit_llm=model)
281
 
282
- # Function to run Deepeval tests and log to mlflow (only metrics that don't need expected_output)
283
- @mlflow.trace
284
  def run_deepeval_tests(test_cases: List[LLMTestCase], eval_model_choice: str = 'gemini'):
285
  model_wrapper = get_deepeval_model(eval_model_choice)
286
-
287
- # Only metrics that do not require expected output
288
  answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5, model=model_wrapper)
289
  hallucination_metric = HallucinationMetric(threshold=0.5, model=model_wrapper)
290
  length_metric = LengthMetric(min_tokens=3, max_tokens=200)
291
 
292
  results = []
293
- with mlflow.start_run(run_name=f"DeepEval-{datetime.now().strftime('%H%M%S')}", nested=True):
294
- for i, tc in enumerate(test_cases):
295
- mlflow.log_param(f"tc_{i}_input", tc.input)
296
- mlflow.log_param(f"tc_{i}_actual", tc.actual_output)
297
- if tc.context:
298
- mlflow.log_text("\n".join(tc.context), f"artifacts/tc_{i}_context.txt")
299
-
300
- # Measure metrics
301
- answer_relevancy_metric.measure(tc)
302
- hallucination_metric.measure(tc)
303
- length_metric.measure(tc)
304
-
305
- entry = {
306
- "input": tc.input,
307
- "actual_output": tc.actual_output,
308
- "context": tc.context,
309
- "answer_relevancy_score": answer_relevancy_metric.score,
310
- "hallucination_score": hallucination_metric.score,
311
- "length_score": length_metric.score
312
- }
313
-
314
- # Log metrics to mlflow
315
- mlflow.log_metric(f"tc_{i}_answer_relevancy", answer_relevancy_metric.score)
316
- mlflow.log_metric(f"tc_{i}_hallucination", hallucination_metric.score)
317
- mlflow.log_metric(f"tc_{i}_length", length_metric.score)
318
-
319
- results.append(entry)
320
-
321
  return results
322
 
323
- # ----------- 8. Gradio UI with Evaluation Tab (Auto-generate actual output from Lightning AI) -----------
324
  with gr.Blocks() as demo:
325
- gr.Markdown("# 🛠️ Maintenance AI Assistant + DeepEval (Lightning AI generator, Gemini evaluator)")
326
 
327
  with gr.Tabs():
328
  with gr.TabItem("Chat (RAG)"):
329
- usage_counter = gr.State(value=0)
330
- session_start = gr.State(value=datetime.now().isoformat())
331
-
332
  question_input = gr.Textbox(label="Ask your maintenance question")
333
  answer_output = gr.Textbox(label="AI Response")
334
  ask_button = gr.Button("Get Answer")
335
- feedback = gr.Radio(["Helpful", "Not Helpful"], label="Was this response helpful?")
336
-
337
- def track_usage(question, count, session_start, feedback=None):
338
- count += 1
339
- with mlflow.start_run(run_name=f"User-Interaction-{count}", nested=True):
340
- mlflow.log_param("question", question)
341
- mlflow.log_param("session_start", session_start)
342
- response = rag_pipeline(question)
343
- if feedback:
344
- mlflow.log_param("user_feedback", feedback)
345
- mlflow.log_metric("helpful_responses", 1 if feedback == "Helpful" else 0)
346
- mlflow.log_metric("total_queries", count)
347
- return response, count, session_start
348
-
349
- ask_button.click(
350
- track_usage,
351
- inputs=[question_input, usage_counter, session_start],
352
- outputs=[answer_output, usage_counter, session_start]
353
- )
354
-
355
- feedback.change(
356
- track_usage,
357
- inputs=[question_input, usage_counter, session_start, feedback],
358
- outputs=[answer_output, usage_counter, session_start]
359
- )
360
 
361
- with gr.TabItem("DeepEval — Model Tests"):
362
- gr.Markdown("### Run DeepEval metrics (no expected output needed). Provide input; optionally auto-generate the model response (Lightning AI). Gemini will evaluate by default.")
 
 
363
 
 
364
  tc_input = gr.Textbox(label="Test Input (prompt)")
365
- tc_actual = gr.Textbox(label="Actual Output (paste model response or leave empty to auto-generate)")
366
  tc_context = gr.Textbox(label="Context (optional)")
367
-
368
- auto_generate = gr.Checkbox(label="Auto-generate actual output from RAG (Lightning AI)", value=True)
369
- model_choice = gr.Radio(["gemini", "litserve"], value="gemini", label="Evaluation backend (Gemini recommended)")
370
  run_button = gr.Button("Run DeepEval")
371
  eval_output = gr.JSON(label="Evaluation Results")
372
 
373
  def run_single_eval(inp, actual, context, autogen, eval_backend):
374
- # If autogen is True, generate actual output via RAG pipeline (Lightning AI)
375
- if autogen or (actual is None or actual.strip() == ""):
376
- generated = rag_pipeline(inp)
377
- actual_output = generated
378
  else:
379
  actual_output = actual
380
-
381
- # Log that actual was autogenerated
382
- with mlflow.start_run(run_name=f"DE-Run-{datetime.now().strftime('%H%M%S')}", nested=True):
383
- mlflow.log_param("input", inp)
384
- mlflow.log_param("autogenerated_actual", autogen)
385
- if context:
386
- mlflow.log_text(context, "artifacts/eval_context.txt")
387
-
388
  tc = LLMTestCase(input=inp, actual_output=actual_output, expected_output=None, context=[context] if context else None)
389
  results = run_deepeval_tests([tc], eval_model_choice=eval_backend)
390
  return results
391
 
392
- run_button.click(
393
- run_single_eval,
394
- inputs=[tc_input, tc_actual, tc_context, auto_generate, model_choice],
395
- outputs=[eval_output]
396
- )
397
 
398
  if __name__ == "__main__":
399
- with mlflow.start_run(run_name="Deployment-Info"):
400
- mlflow.log_params({
401
- "app_version": "1.3.0",
402
- "deployment_platform": "Lightning AI / HuggingFace Space",
403
- "deployment_time": datetime.now().isoformat(),
404
- "code_version": os.getenv("GIT_COMMIT", "dev")
405
- })
406
-
407
  demo.launch()
 
 
1
  import os
2
  import gradio as gr
3
  import requests
 
36
  PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", "")
37
  MLFLOW_TRACKING_URI = os.environ.get("MLFLOW_TRACKING_URI", "http://localhost:5000")
38
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
39
+ LITSERVE_ENDPOINT = os.environ.get("LITSERVE_ENDPOINT", "http://localhost:8000/predict")
40
 
41
  # DagsHub & MLflow Setup (guarded)
42
  try:
 
69
  Answer:
70
  """, "artifacts/prompt_template.txt")
71
 
72
+ # ----------- 1. Custom LLM for LitServe endpoint -----------
73
  class LitServeLLM(LLM):
74
  endpoint_url: str
75
 
 
76
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
77
  payload = {"prompt": prompt}
78
+ start_time = time.time()
79
+ response = requests.post(self.endpoint_url, json=payload)
80
+ latency = time.time() - start_time
81
+ mlflow.log_metric("lit_serve_latency", latency)
82
+ if response.status_code == 200:
83
+ data = response.json()
84
+ mlflow.log_metric("response_tokens", len(data.get("response", "").split()))
85
+ return data.get("response", "").strip()
86
+ else:
87
+ mlflow.log_metric("request_errors", 1)
88
+ error_info = {
89
+ "status_code": response.status_code,
90
+ "error": response.text,
91
+ "timestamp": datetime.now().isoformat()
92
+ }
93
+ mlflow.log_dict(error_info, "artifacts/error_log.json")
94
+ raise ValueError(f"Request failed: {response.status_code}")
 
95
 
96
  @property
97
  def _identifying_params(self) -> Mapping[str, Any]:
 
102
  return "litserve_llm"
103
 
104
  # ----------- 2. Pinecone Connection -----------
 
105
  def init_pinecone():
 
106
  pc = Pinecone(api_key=PINECONE_API_KEY)
107
  return pc.Index("rag-granite-index")
108
 
 
114
  # ----------- 3. Embedding Model -----------
115
  embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
116
 
117
+ # ----------- 4. Context Retrieval -----------
 
118
  def get_retrieved_context(query: str, top_k=3):
119
+ query_embedding = embeddings_model.embed_query(query)
 
 
 
 
120
  if index is None:
121
  return ""
122
+ results = index.query(
123
+ namespace="rag-ns",
124
+ vector=query_embedding,
125
+ top_k=top_k,
126
+ include_metadata=True
127
+ )
 
 
 
 
 
 
128
  context_parts = [match['metadata']['text'] for match in results['matches']]
129
  return "\n".join(context_parts)
130
 
131
+ # ----------- 5. LLM Chain Setup -----------
 
132
  model = LitServeLLM(endpoint_url=LITSERVE_ENDPOINT)
133
 
134
  prompt = PromptTemplate(
 
145
 
146
  llm_chain = LLMChain(llm=model, prompt=prompt)
147
 
148
+ # ----------- 6. RAG Pipeline -----------
 
149
  def rag_pipeline(question):
150
  try:
151
+ retrieved_context = get_retrieved_context(question)
152
+ mlflow.log_text(retrieved_context, "artifacts/retrieved_context.txt")
153
+ response_obj = llm_chain.invoke({"context": retrieved_context, "question": question})
154
+ response = response_obj.get("text") if isinstance(response_obj, dict) else getattr(response_obj, "text", str(response_obj))
155
+ response = response.strip()
156
+ if "Answer:" in response:
157
+ response = response.split("Answer:", 1)[-1].strip()
158
+ mlflow.log_text(response, "artifacts/response.txt")
159
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
160
  except Exception as e:
161
+ error_info = {"error": str(e), "question": question, "timestamp": datetime.now().isoformat()}
 
 
 
 
 
162
  mlflow.log_dict(error_info, "artifacts/pipeline_errors.json")
163
  return f"Error: {str(e)}"
164
 
165
+ # ----------- 7. DeepEval Wrappers -----------
166
  class GoogleVertexAI(DeepEvalBaseLLM):
167
  def __init__(self, model):
168
  self.model = model
 
179
  return res.get('content') or res.get('text') or str(res)
180
  return str(res)
181
 
 
 
 
 
 
182
  def get_model_name(self):
183
  return "Vertex AI Model"
184
 
 
192
  def generate(self, prompt: str) -> str:
193
  return self.lit_llm._call(prompt)
194
 
 
 
 
195
  def get_model_name(self):
196
  return "LitServeModel"
197
 
198
+ # ----------- 8. Custom Metric -----------
199
  class LengthMetric(BaseMetric):
200
  def __init__(self, min_tokens: int = 1, max_tokens: int = 200):
201
  self.min_tokens = min_tokens
 
213
  self.success = (self.min_tokens <= tokens <= self.max_tokens)
214
  return self.score
215
 
 
 
 
216
  def is_successful(self):
217
  return self.success
218
 
 
220
  def name(self):
221
  return "Length Metric"
222
 
223
+ # ----------- 9. Evaluation Setup -----------
224
  def get_deepeval_model(choice: str = 'gemini'):
225
  if choice == 'gemini' and ChatGoogleGenerativeAI is not None and GOOGLE_API_KEY:
226
+ genai.configure(api_key=GOOGLE_API_KEY)
 
 
 
227
  chat_model = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
228
  return GoogleVertexAI(model=chat_model)
229
  else:
 
230
  return LitServeWrapper(lit_llm=model)
231
 
 
 
232
  def run_deepeval_tests(test_cases: List[LLMTestCase], eval_model_choice: str = 'gemini'):
233
  model_wrapper = get_deepeval_model(eval_model_choice)
 
 
234
  answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5, model=model_wrapper)
235
  hallucination_metric = HallucinationMetric(threshold=0.5, model=model_wrapper)
236
  length_metric = LengthMetric(min_tokens=3, max_tokens=200)
237
 
238
  results = []
239
+ for i, tc in enumerate(test_cases):
240
+ if tc.context:
241
+ mlflow.log_text("\n".join(tc.context), f"artifacts/tc_{i}_context.txt")
242
+ answer_relevancy_metric.measure(tc)
243
+ hallucination_metric.measure(tc)
244
+ length_metric.measure(tc)
245
+ entry = {
246
+ "input": tc.input,
247
+ "actual_output": tc.actual_output,
248
+ "context": tc.context,
249
+ "answer_relevancy_score": answer_relevancy_metric.score,
250
+ "hallucination_score": hallucination_metric.score,
251
+ "length_score": length_metric.score
252
+ }
253
+ results.append(entry)
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  return results
255
 
256
+ # ----------- 10. Gradio App -----------
257
  with gr.Blocks() as demo:
258
+ gr.Markdown("# 🛠️ Maintenance AI Assistant + DeepEval")
259
 
260
  with gr.Tabs():
261
  with gr.TabItem("Chat (RAG)"):
 
 
 
262
  question_input = gr.Textbox(label="Ask your maintenance question")
263
  answer_output = gr.Textbox(label="AI Response")
264
  ask_button = gr.Button("Get Answer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
+ def handle_question(question):
267
+ return rag_pipeline(question)
268
+
269
+ ask_button.click(handle_question, inputs=[question_input], outputs=[answer_output])
270
 
271
+ with gr.TabItem("DeepEval — Model Tests"):
272
  tc_input = gr.Textbox(label="Test Input (prompt)")
273
+ tc_actual = gr.Textbox(label="Actual Output (leave empty to auto-generate)")
274
  tc_context = gr.Textbox(label="Context (optional)")
275
+ auto_generate = gr.Checkbox(label="Auto-generate actual output", value=True)
276
+ model_choice = gr.Radio(["gemini", "litserve"], value="gemini", label="Evaluation backend")
 
277
  run_button = gr.Button("Run DeepEval")
278
  eval_output = gr.JSON(label="Evaluation Results")
279
 
280
  def run_single_eval(inp, actual, context, autogen, eval_backend):
281
+ if autogen or not actual.strip():
282
+ actual_output = rag_pipeline(inp)
 
 
283
  else:
284
  actual_output = actual
 
 
 
 
 
 
 
 
285
  tc = LLMTestCase(input=inp, actual_output=actual_output, expected_output=None, context=[context] if context else None)
286
  results = run_deepeval_tests([tc], eval_model_choice=eval_backend)
287
  return results
288
 
289
+ run_button.click(run_single_eval, inputs=[tc_input, tc_actual, tc_context, auto_generate, model_choice], outputs=[eval_output])
 
 
 
 
290
 
291
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
292
  demo.launch()