Deevyankar commited on
Commit
b1e2eee
·
verified ·
1 Parent(s): 0f5b3f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -305
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import re
3
  import json
4
- import html
5
  import pickle
6
  from urllib.parse import quote
7
 
@@ -11,30 +10,20 @@ from rank_bm25 import BM25Okapi
11
  from sentence_transformers import SentenceTransformer
12
  from openai import OpenAI
13
 
14
- # =====================================================
15
- # PATHS
16
- # =====================================================
17
  BUILD_DIR = "brainchat_build"
18
  CHUNKS_PATH = os.path.join(BUILD_DIR, "chunks.pkl")
19
  TOKENS_PATH = os.path.join(BUILD_DIR, "tokenized_chunks.pkl")
20
  EMBED_PATH = os.path.join(BUILD_DIR, "embeddings.npy")
21
  CONFIG_PATH = os.path.join(BUILD_DIR, "config.json")
22
-
23
- # Put your logo file in the Space root with this exact name
24
  LOGO_FILE = "Brain chat-09.png"
25
 
26
- # =====================================================
27
- # GLOBALS
28
- # =====================================================
29
  EMBED_MODEL = None
30
  BM25 = None
31
  CHUNKS = None
32
  EMBEDDINGS = None
33
  OAI = None
34
 
35
- # =====================================================
36
- # HELPERS
37
- # =====================================================
38
  def tokenize(text: str):
39
  return re.findall(r"\w+", text.lower(), flags=re.UNICODE)
40
 
@@ -43,15 +32,9 @@ def ensure_loaded():
43
  global EMBED_MODEL, BM25, CHUNKS, EMBEDDINGS, OAI
44
 
45
  if CHUNKS is None:
46
- missing = []
47
  for path in [CHUNKS_PATH, TOKENS_PATH, EMBED_PATH, CONFIG_PATH]:
48
  if not os.path.exists(path):
49
- missing.append(path)
50
-
51
- if missing:
52
- raise FileNotFoundError(
53
- "Missing build files:\n" + "\n".join(missing)
54
- )
55
 
56
  with open(CHUNKS_PATH, "rb") as f:
57
  CHUNKS = pickle.load(f)
@@ -132,19 +115,6 @@ def choose_quiz_count(user_text: str, selector: str) -> int:
132
  return 3
133
 
134
 
135
- def detect_logo_url():
136
- if os.path.exists(LOGO_FILE):
137
- return f"/gradio_api/file={quote(LOGO_FILE)}"
138
- return None
139
-
140
-
141
- def clean_md(text: str) -> str:
142
- return text.strip()
143
-
144
-
145
- # =====================================================
146
- # PROMPT BUILDERS
147
- # =====================================================
148
  def language_instruction(language_mode: str) -> str:
149
  if language_mode == "English":
150
  return "Answer only in English."
@@ -160,22 +130,11 @@ def language_instruction(language_mode: str) -> str:
160
 
161
  def build_tutor_prompt(mode: str, language_mode: str, question: str, context: str) -> str:
162
  mode_map = {
163
- "Explain": (
164
- "Explain clearly like a friendly tutor. "
165
- "Use simple wording and short headings when useful."
166
- ),
167
- "Detailed": (
168
- "Give a fuller explanation. Include concept, key points, and clinical relevance when supported by context."
169
- ),
170
- "Short Notes": (
171
- "Answer in concise revision-note format using short bullet points."
172
- ),
173
- "Flashcards": (
174
- "Create 6 flashcards in Q/A format using only the provided context."
175
- ),
176
- "Case-Based": (
177
- "Create a short clinical scenario and then explain the concept clearly using the context."
178
- ),
179
  }
180
 
181
  return f"""
@@ -185,8 +144,7 @@ Rules:
185
  - Use only the provided context from the books.
186
  - If the answer is not supported by the context, say exactly:
187
  Not found in the course material.
188
- - Be accurate, calm, and student-friendly.
189
- - Do not invent facts outside the context.
190
  - {language_instruction(language_mode)}
191
 
192
  Teaching style:
@@ -207,7 +165,7 @@ You are BrainChat, an interactive tutor.
207
  Rules:
208
  - Use only the provided context.
209
  - Create exactly {n_questions} quiz questions.
210
- - Questions should be short, clear, and suitable for students.
211
  - Also create a short answer key.
212
  - Return valid JSON only.
213
  - {language_instruction(language_mode)}
@@ -229,48 +187,6 @@ Topic:
229
  """.strip()
230
 
231
 
232
- def build_quiz_evaluation_prompt(language_mode: str, quiz_data: dict, user_answers: str) -> str:
233
- quiz_json = json.dumps(quiz_data, ensure_ascii=False)
234
- return f"""
235
- You are BrainChat, an interactive tutor.
236
-
237
- Evaluate the student's answers fairly using the quiz answer key.
238
- Give:
239
- - total score
240
- - per-question feedback
241
- - one short improvement suggestion
242
-
243
- Rules:
244
- - Be fair to answers that are semantically correct even if wording differs.
245
- - Return valid JSON only.
246
- - {language_instruction(language_mode)}
247
-
248
- Required JSON format:
249
- {{
250
- "score_obtained": 0,
251
- "score_total": 0,
252
- "summary": "short overall feedback",
253
- "results": [
254
- {{
255
- "question": "question text",
256
- "student_answer": "student answer",
257
- "result": "Correct / Partially Correct / Incorrect",
258
- "feedback": "short explanation"
259
- }}
260
- ]
261
- }}
262
-
263
- Quiz data:
264
- {quiz_json}
265
-
266
- Student answers:
267
- {user_answers}
268
- """.strip()
269
-
270
-
271
- # =====================================================
272
- # OPENAI HELPERS
273
- # =====================================================
274
  def chat_text(prompt: str) -> str:
275
  resp = OAI.chat.completions.create(
276
  model="gpt-4o-mini",
@@ -296,172 +212,18 @@ def chat_json(prompt: str) -> dict:
296
  return json.loads(resp.choices[0].message.content)
297
 
298
 
299
- # =====================================================
300
- # MAIN CHAT LOGIC
301
- # =====================================================
302
- def answer_question(message, history, mode, language_mode, quiz_count_mode, show_sources, quiz_state):
303
- if history is None:
304
- history = []
305
- if quiz_state is None:
306
- quiz_state = {
307
- "active": False,
308
- "topic": None,
309
- "quiz_data": None,
310
- "language_mode": "Auto"
311
- }
312
-
313
- if not message or not message.strip():
314
- return history, quiz_state, ""
315
-
316
- try:
317
- ensure_loaded()
318
- except Exception as e:
319
- history.append((message, f"Error: {str(e)}"))
320
- return history, quiz_state, ""
321
-
322
- user_text = message.strip()
323
-
324
- # ---------------------------------------------
325
- # If quiz is already active, evaluate answers
326
- # ---------------------------------------------
327
- if quiz_state.get("active", False):
328
- try:
329
- evaluation_prompt = build_quiz_evaluation_prompt(
330
- quiz_state["language_mode"],
331
- quiz_state["quiz_data"],
332
- user_text
333
- )
334
- evaluation = chat_json(evaluation_prompt)
335
-
336
- lines = []
337
- lines.append(f"**Score:** {evaluation['score_obtained']}/{evaluation['score_total']}")
338
- lines.append("")
339
- lines.append(f"**Overall feedback:** {evaluation['summary']}")
340
- lines.append("")
341
- lines.append("**Question-wise evaluation:**")
342
-
343
- for item in evaluation["results"]:
344
- lines.append("")
345
- lines.append(f"**Q:** {item['question']}")
346
- lines.append(f"**Your answer:** {item['student_answer']}")
347
- lines.append(f"**Result:** {item['result']}")
348
- lines.append(f"**Feedback:** {item['feedback']}")
349
-
350
- final_answer = "\n".join(lines)
351
-
352
- history.append((user_text, final_answer))
353
-
354
- quiz_state = {
355
- "active": False,
356
- "topic": None,
357
- "quiz_data": None,
358
- "language_mode": language_mode
359
- }
360
-
361
- return history, quiz_state, ""
362
-
363
- except Exception as e:
364
- history.append((user_text, f"Error while evaluating quiz: {str(e)}"))
365
- quiz_state["active"] = False
366
- return history, quiz_state, ""
367
-
368
- # ---------------------------------------------
369
- # Normal retrieval
370
- # ---------------------------------------------
371
- records = search_hybrid(user_text, shortlist_k=30, final_k=5)
372
- context = build_context(records)
373
-
374
- # ---------------------------------------------
375
- # Quiz mode
376
- # ---------------------------------------------
377
- if mode == "Quiz Me":
378
- try:
379
- n_questions = choose_quiz_count(user_text, quiz_count_mode)
380
- prompt = build_quiz_generation_prompt(language_mode, user_text, context, n_questions)
381
- quiz_data = chat_json(prompt)
382
-
383
- lines = []
384
- lines.append(f"**{quiz_data.get('title', 'Quiz')}**")
385
- lines.append("")
386
- lines.append("Please answer the following questions in one message.")
387
- lines.append("You can reply in numbered format, for example:")
388
- lines.append("1. ...")
389
- lines.append("2. ...")
390
- lines.append("")
391
- lines.append(f"**Total questions: {len(quiz_data['questions'])}**")
392
- lines.append("")
393
-
394
- for i, q in enumerate(quiz_data["questions"], start=1):
395
- lines.append(f"**Q{i}.** {q['q']}")
396
-
397
- if show_sources:
398
- lines.append("\n---\n**Topic sources used to create the quiz:**")
399
- lines.append(make_sources(records))
400
-
401
- assistant_text = "\n".join(lines)
402
-
403
- history.append((user_text, assistant_text))
404
-
405
- quiz_state = {
406
- "active": True,
407
- "topic": user_text,
408
- "quiz_data": quiz_data,
409
- "language_mode": language_mode
410
- }
411
-
412
- return history, quiz_state, ""
413
-
414
- except Exception as e:
415
- history.append((user_text, f"Error while creating quiz: {str(e)}"))
416
- return history, quiz_state, ""
417
-
418
- # ---------------------------------------------
419
- # Other modes
420
- # ---------------------------------------------
421
- try:
422
- prompt = build_tutor_prompt(mode, language_mode, user_text, context)
423
- answer = chat_text(prompt)
424
-
425
- if show_sources:
426
- answer += "\n\n---\n**Sources used:**\n" + make_sources(records)
427
-
428
- history.append((user_text, clean_md(answer)))
429
- return history, quiz_state, ""
430
-
431
- except Exception as e:
432
- history.append((user_text, f"Error: {str(e)}"))
433
- return history, quiz_state, ""
434
-
435
-
436
- def clear_all():
437
- empty_quiz = {
438
- "active": False,
439
- "topic": None,
440
- "quiz_data": None,
441
- "language_mode": "Auto"
442
- }
443
- return [], empty_quiz, ""
444
 
445
 
446
- # =====================================================
447
- # UI
448
- # =====================================================
449
  def render_header():
450
  logo_url = detect_logo_url()
451
  if logo_url:
452
- logo_html = f"""
453
- <img src="{logo_url}" alt="BrainChat Logo"
454
- style="width:120px;height:120px;object-fit:contain;display:block;margin:0 auto;">
455
- """
456
  else:
457
- logo_html = """
458
- <div style="
459
- width:120px;height:120px;border-radius:50%;
460
- background:#efe85a;display:flex;align-items:center;justify-content:center;
461
- font-weight:700;text-align:center;margin:0 auto;">
462
- BRAIN<br>CHAT
463
- </div>
464
- """
465
 
466
  return f"""
467
  <div class="hero-card">
@@ -476,14 +238,58 @@ def render_header():
476
  """
477
 
478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  CSS = """
480
  body, .gradio-container {
481
  background: #dcdcdc !important;
482
  font-family: Arial, Helvetica, sans-serif !important;
483
  }
484
- footer {
485
- display: none !important;
486
- }
487
  .hero-card {
488
  max-width: 860px;
489
  margin: 18px auto 14px auto;
@@ -491,9 +297,7 @@ footer {
491
  background: linear-gradient(180deg, #e8c7d4 0%, #a55ca2 48%, #2b0c46 100%);
492
  padding: 22px 22px 18px 22px;
493
  }
494
- .hero-inner {
495
- text-align: center;
496
- }
497
  .hero-title {
498
  color: white;
499
  font-size: 34px;
@@ -506,24 +310,13 @@ footer {
506
  font-size: 16px;
507
  margin-top: 6px;
508
  }
509
- .control-row {
510
- max-width: 860px;
511
- margin: 0 auto 8px auto;
512
- }
513
  """
514
 
515
 
516
  with gr.Blocks(css=CSS) as demo:
517
- quiz_state = gr.State({
518
- "active": False,
519
- "topic": None,
520
- "quiz_data": None,
521
- "language_mode": "Auto"
522
- })
523
-
524
  gr.HTML(render_header())
525
 
526
- with gr.Row(elem_classes="control-row"):
527
  mode = gr.Dropdown(
528
  choices=["Explain", "Detailed", "Short Notes", "Quiz Me", "Flashcards", "Case-Based"],
529
  value="Explain",
@@ -535,7 +328,7 @@ with gr.Blocks(css=CSS) as demo:
535
  label="Answer Language"
536
  )
537
 
538
- with gr.Row(elem_classes="control-row"):
539
  quiz_count_mode = gr.Dropdown(
540
  choices=["Auto", "3", "5", "7"],
541
  value="Auto",
@@ -543,43 +336,23 @@ with gr.Blocks(css=CSS) as demo:
543
  )
544
  show_sources = gr.Checkbox(value=True, label="Show Sources")
545
 
546
- gr.Markdown(
547
- """
548
  **How to use**
549
  - Choose a **Tutor Mode**
550
  - Then type a topic or question
551
  - For **Quiz Me**, type a topic such as: `cranial nerves`
552
- - The system will ask questions, and your **next message will be evaluated automatically**
553
- """
554
- )
555
-
556
- chatbot = gr.Chatbot(height=520)
557
- msg = gr.Textbox(
558
- placeholder="Ask a question or type a topic...",
559
- lines=1,
560
- show_label=False
561
- )
562
-
563
- with gr.Row():
564
- send_btn = gr.Button("Send")
565
- clear_btn = gr.Button("Clear Chat")
566
-
567
- msg.submit(
568
- answer_question,
569
- inputs=[msg, chatbot, mode, language_mode, quiz_count_mode, show_sources, quiz_state],
570
- outputs=[chatbot, quiz_state, msg]
571
- )
572
-
573
- send_btn.click(
574
- answer_question,
575
- inputs=[msg, chatbot, mode, language_mode, quiz_count_mode, show_sources, quiz_state],
576
- outputs=[chatbot, quiz_state, msg]
577
- )
578
-
579
- clear_btn.click(
580
- clear_all,
581
- inputs=[],
582
- outputs=[chatbot, quiz_state, msg]
583
  )
584
 
585
  if __name__ == "__main__":
 
1
  import os
2
  import re
3
  import json
 
4
  import pickle
5
  from urllib.parse import quote
6
 
 
10
  from sentence_transformers import SentenceTransformer
11
  from openai import OpenAI
12
 
 
 
 
13
  BUILD_DIR = "brainchat_build"
14
  CHUNKS_PATH = os.path.join(BUILD_DIR, "chunks.pkl")
15
  TOKENS_PATH = os.path.join(BUILD_DIR, "tokenized_chunks.pkl")
16
  EMBED_PATH = os.path.join(BUILD_DIR, "embeddings.npy")
17
  CONFIG_PATH = os.path.join(BUILD_DIR, "config.json")
 
 
18
  LOGO_FILE = "Brain chat-09.png"
19
 
 
 
 
20
  EMBED_MODEL = None
21
  BM25 = None
22
  CHUNKS = None
23
  EMBEDDINGS = None
24
  OAI = None
25
 
26
+
 
 
27
  def tokenize(text: str):
28
  return re.findall(r"\w+", text.lower(), flags=re.UNICODE)
29
 
 
32
  global EMBED_MODEL, BM25, CHUNKS, EMBEDDINGS, OAI
33
 
34
  if CHUNKS is None:
 
35
  for path in [CHUNKS_PATH, TOKENS_PATH, EMBED_PATH, CONFIG_PATH]:
36
  if not os.path.exists(path):
37
+ raise FileNotFoundError(f"Missing file: {path}")
 
 
 
 
 
38
 
39
  with open(CHUNKS_PATH, "rb") as f:
40
  CHUNKS = pickle.load(f)
 
115
  return 3
116
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def language_instruction(language_mode: str) -> str:
119
  if language_mode == "English":
120
  return "Answer only in English."
 
130
 
131
  def build_tutor_prompt(mode: str, language_mode: str, question: str, context: str) -> str:
132
  mode_map = {
133
+ "Explain": "Explain clearly like a friendly tutor using simple language.",
134
+ "Detailed": "Give a fuller and more detailed explanation.",
135
+ "Short Notes": "Answer in concise revision-note format using bullets.",
136
+ "Flashcards": "Create 6 flashcards in Q/A format.",
137
+ "Case-Based": "Create a short clinical scenario and explain it clearly."
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
 
140
  return f"""
 
144
  - Use only the provided context from the books.
145
  - If the answer is not supported by the context, say exactly:
146
  Not found in the course material.
147
+ - Be accurate and student-friendly.
 
148
  - {language_instruction(language_mode)}
149
 
150
  Teaching style:
 
165
  Rules:
166
  - Use only the provided context.
167
  - Create exactly {n_questions} quiz questions.
168
+ - Questions should be short and clear.
169
  - Also create a short answer key.
170
  - Return valid JSON only.
171
  - {language_instruction(language_mode)}
 
187
  """.strip()
188
 
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  def chat_text(prompt: str) -> str:
191
  resp = OAI.chat.completions.create(
192
  model="gpt-4o-mini",
 
212
  return json.loads(resp.choices[0].message.content)
213
 
214
 
215
+ def detect_logo_url():
216
+ if os.path.exists(LOGO_FILE):
217
+ return f"/gradio_api/file={quote(LOGO_FILE)}"
218
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
 
 
 
 
221
  def render_header():
222
  logo_url = detect_logo_url()
223
  if logo_url:
224
+ logo_html = f'<img src="{logo_url}" alt="BrainChat Logo" style="width:120px;height:120px;object-fit:contain;display:block;margin:0 auto;">'
 
 
 
225
  else:
226
+ logo_html = '<div style="width:120px;height:120px;border-radius:50%;background:#efe85a;display:flex;align-items:center;justify-content:center;font-weight:700;text-align:center;margin:0 auto;">BRAIN<br>CHAT</div>'
 
 
 
 
 
 
 
227
 
228
  return f"""
229
  <div class="hero-card">
 
238
  """
239
 
240
 
241
+ def answer_question(message, history, mode, language_mode, quiz_count_mode, show_sources):
242
+ if not message or not message.strip():
243
+ return "Please type a topic or question."
244
+
245
+ ensure_loaded()
246
+ user_text = message.strip()
247
+
248
+ records = search_hybrid(user_text, shortlist_k=30, final_k=5)
249
+ context = build_context(records)
250
+
251
+ if mode == "Quiz Me":
252
+ n_questions = choose_quiz_count(user_text, quiz_count_mode)
253
+ prompt = build_quiz_generation_prompt(language_mode, user_text, context, n_questions)
254
+ quiz_data = chat_json(prompt)
255
+
256
+ lines = []
257
+ lines.append(f"**{quiz_data.get('title', 'Quiz')}**")
258
+ lines.append("")
259
+ lines.append(f"**Total questions: {len(quiz_data['questions'])}**")
260
+ lines.append("")
261
+
262
+ for i, q in enumerate(quiz_data["questions"], start=1):
263
+ lines.append(f"**Q{i}.** {q['q']}")
264
+
265
+ lines.append("")
266
+ lines.append("Reply with your answers in one message, for example:")
267
+ lines.append("1. ...")
268
+ lines.append("2. ...")
269
+ lines.append("")
270
+ lines.append("This version generates quiz questions only. Evaluation can be added next.")
271
+
272
+ if show_sources:
273
+ lines.append("\n---\n**Topic sources used to create the quiz:**")
274
+ lines.append(make_sources(records))
275
+
276
+ return "\n".join(lines)
277
+
278
+ prompt = build_tutor_prompt(mode, language_mode, user_text, context)
279
+ answer = chat_text(prompt)
280
+
281
+ if show_sources:
282
+ answer += "\n\n---\n**Sources used:**\n" + make_sources(records)
283
+
284
+ return answer
285
+
286
+
287
  CSS = """
288
  body, .gradio-container {
289
  background: #dcdcdc !important;
290
  font-family: Arial, Helvetica, sans-serif !important;
291
  }
292
+ footer { display: none !important; }
 
 
293
  .hero-card {
294
  max-width: 860px;
295
  margin: 18px auto 14px auto;
 
297
  background: linear-gradient(180deg, #e8c7d4 0%, #a55ca2 48%, #2b0c46 100%);
298
  padding: 22px 22px 18px 22px;
299
  }
300
+ .hero-inner { text-align: center; }
 
 
301
  .hero-title {
302
  color: white;
303
  font-size: 34px;
 
310
  font-size: 16px;
311
  margin-top: 6px;
312
  }
 
 
 
 
313
  """
314
 
315
 
316
  with gr.Blocks(css=CSS) as demo:
 
 
 
 
 
 
 
317
  gr.HTML(render_header())
318
 
319
+ with gr.Row():
320
  mode = gr.Dropdown(
321
  choices=["Explain", "Detailed", "Short Notes", "Quiz Me", "Flashcards", "Case-Based"],
322
  value="Explain",
 
328
  label="Answer Language"
329
  )
330
 
331
+ with gr.Row():
332
  quiz_count_mode = gr.Dropdown(
333
  choices=["Auto", "3", "5", "7"],
334
  value="Auto",
 
336
  )
337
  show_sources = gr.Checkbox(value=True, label="Show Sources")
338
 
339
+ gr.Markdown("""
 
340
  **How to use**
341
  - Choose a **Tutor Mode**
342
  - Then type a topic or question
343
  - For **Quiz Me**, type a topic such as: `cranial nerves`
344
+ - For **Flashcards**, type a topic such as: `hippocampus`
345
+ """)
346
+
347
+ gr.ChatInterface(
348
+ fn=answer_question,
349
+ additional_inputs=[mode, language_mode, quiz_count_mode, show_sources],
350
+ title=None,
351
+ description=None,
352
+ textbox=gr.Textbox(
353
+ placeholder="Ask a question or type a topic...",
354
+ lines=1
355
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  )
357
 
358
  if __name__ == "__main__":