Spaces:

gladguy
/

SimpleProject

Sleeping

App Files Files Community

gladguy commited on Nov 23, 2025

Commit

23cc7e7

1 Parent(s): 6390e94

Enhance Book Learning with text extraction for accurate analysis

Browse files

Files changed (1) hide show

app.py +63 -49

app.py CHANGED Viewed

@@ -374,14 +374,14 @@ def process_anatomy_query(query: str) -> tuple:
 # Book Learning Mode Functions
 def process_uploaded_book(pdf_file):
     """
-    Process uploaded PDF book and extract first 20 pages with images.
-    Returns (list_of_images, status_message)
     """
     if pdf_file is None:
         return [], "Please upload a PDF file."
     try:
-        extracted_images = []
         # Save uploaded file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
@@ -390,13 +390,28 @@ def process_uploaded_book(pdf_file):
         try:
             # Convert first 20 pages to images
-            pages = convert_from_path(tmp_path, first_page=1, last_page=20, dpi=150)
-            for i, page in enumerate(pages):
-                extracted_images.append((page, f"Page {i+1}"))
-            status = f"✅ Successfully processed {len(extracted_images)} pages from your anatomy textbook!"
-            return extracted_images, status
         finally:
             # Clean up temp file
@@ -407,7 +422,7 @@ def process_uploaded_book(pdf_file):
         return [], f"⚠️ Error processing PDF: {str(e)}"
-def analyze_book_image(image, page_info):
     """
     Analyze selected image from book using AI to extract anatomical information.
     Returns formatted explanation text.
@@ -416,53 +431,45 @@ def analyze_book_image(image, page_info):
         return "Please select an image from the book."
     try:
-        # For now, we'll use the Hyperbolic API to analyze what might be in the image
-        # In a production version, you'd use vision models or OCR
         headers = {
             "Content-Type": "application/json",
             "Authorization": f"Bearer {HYPERBOLIC_API_KEY}"
         }
         prompt = f"""You are an anatomy professor helping MBBS students learn from their textbook.
-A student is looking at {page_info} which contains an anatomical diagram or illustration.
-Provide a comprehensive explanation that would typically accompany anatomical images in medical textbooks:
-## 📖 Anatomical Structure Overview
-[Explain what anatomical structure is likely shown]
-## 🔍 Key Features to Observe
-- [Feature 1 - what students should look for in the diagram]
-- [Feature 2]
-- [Feature 3]
-- [Feature 4]
 ## 🏥 Clinical Relevance
-- [Clinical point 1]
-- [Clinical point 2]
 ## 💡 Study Tips
-[How to effectively study this diagram/structure]
 ## ❓ Self-Test Questions
-1. [Question about identification]
-2. [Question about function/relationship]
-3. [Question about clinical application]
-Be thorough and educational, as if explaining a textbook figure."""
         payload = {
             "model": HYPERBOLIC_MODEL,
             "messages": [{"role": "user", "content": prompt}],
             "max_tokens": 800,
-            "temperature": 0.7
         }
         response = requests.post(HYPERBOLIC_API_URL, headers=headers, json=payload, timeout=25)
@@ -471,13 +478,13 @@ Be thorough and educational, as if explaining a textbook figure."""
         result = response.json()
         explanation = result["choices"][0]["message"]["content"]
-        formatted_output = f"""# 📚 Textbook Page Analysis: {page_info}
 {explanation}
 ---
-💪 **Next Steps:** After studying this page, you can test your knowledge in VIVA mode!"""
         return formatted_output
@@ -624,9 +631,10 @@ with gr.Blocks(title="AnatomyBot - MBBS Anatomy Tutor") as demo:
             pdf_upload = gr.File(label="Upload Anatomy Textbook (PDF)", file_types=[".pdf"], type="binary")
             upload_status = gr.Markdown()
-            # State to hold extracted images and captions
             book_images_state = gr.State([])
             page_captions_state = gr.State([])
             # Dropdown to select a page after processing
             page_dropdown = gr.Dropdown(label="Select Page", choices=[], interactive=False)
@@ -639,25 +647,28 @@ with gr.Blocks(title="AnatomyBot - MBBS Anatomy Tutor") as demo:
             # Process upload
             def handle_book_upload(pdf_bytes):
-                images, status_msg = process_uploaded_book(pdf_bytes)
-                if not images:
-                    # No images extracted
-                    return [], status_msg, [], gr.update(choices=[], interactive=False), None, ""
-                # Separate images and captions
-                img_list = [img for img, cap in images]
-                caps = [cap for img, cap in images]
                 # Update dropdown with captions and enable it
                 dropdown_update = gr.update(choices=caps, interactive=True)
-                return img_list, status_msg, caps, dropdown_update, None, ""
             pdf_upload.upload(
                 fn=handle_book_upload,
                 inputs=[pdf_upload],
-                outputs=[book_images_state, upload_status, page_captions_state, page_dropdown, selected_page_image, analysis_output]
             )
             # When a page is selected, show image and analysis
-            def show_page_analysis(selected_caption, images, captions):
                 if not selected_caption:
                     return None, ""
                 # Find index
@@ -665,11 +676,14 @@ with gr.Blocks(title="AnatomyBot - MBBS Anatomy Tutor") as demo:
                     idx = captions.index(selected_caption)
                 except ValueError:
                     return None, ""
                 img = images[idx]
-                analysis = analyze_book_image(img, selected_caption)
                 return img, analysis
-            page_dropdown.change(fn=show_page_analysis, inputs=[page_dropdown, book_images_state, page_captions_state], outputs=[selected_page_image, analysis_output])
         # VIVA MODE TAB
         with gr.Tab("🎯 VIVA Training Mode") as viva_tab:

 # Book Learning Mode Functions
 def process_uploaded_book(pdf_file):
     """
+    Process uploaded PDF book and extract first 20 pages with images and text.
+    Returns (list_of_tuples, status_message) where tuple is (image, caption, text)
     """
     if pdf_file is None:
         return [], "Please upload a PDF file."
     try:
+        extracted_data = []
         # Save uploaded file temporarily
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
         try:
             # Convert first 20 pages to images
+            images = convert_from_path(tmp_path, first_page=1, last_page=20, dpi=150)
+            # Extract text from pages
+            reader = PyPDF2.PdfReader(tmp_path)
+            for i, image in enumerate(images):
+                # Get text for this page if available
+                text_content = ""
+                if i < len(reader.pages):
+                    try:
+                        text_content = reader.pages[i].extract_text()
+                    except:
+                        text_content = "Could not extract text from this page."
+                # Limit text length to avoid token limits
+                if len(text_content) > 2000:
+                    text_content = text_content[:2000] + "..."
+                extracted_data.append((image, f"Page {i+1}", text_content))
+            status = f"✅ Successfully processed {len(extracted_data)} pages from your anatomy textbook!"
+            return extracted_data, status
         finally:
             # Clean up temp file
         return [], f"⚠️ Error processing PDF: {str(e)}"
+def analyze_book_image(image, page_info, page_text=""):
     """
     Analyze selected image from book using AI to extract anatomical information.
     Returns formatted explanation text.
         return "Please select an image from the book."
     try:
         headers = {
             "Content-Type": "application/json",
             "Authorization": f"Bearer {HYPERBOLIC_API_KEY}"
         }
+        # Include extracted text in the prompt context
+        context_text = f"Page Content:\n{page_text}" if page_text else "No text extracted from this page."
         prompt = f"""You are an anatomy professor helping MBBS students learn from their textbook.
+A student is looking at {page_info} of their anatomy textbook.
+{context_text}
+Based on the text content above (and typical anatomical diagrams found in such contexts), provide a comprehensive explanation:
+## 📖 Page Overview
+[Summarize the key anatomical topic discussed on this page based on the text]
+## 🔍 Key Concepts Explained
+[Explain the main concepts covered in the text in simple terms]
 ## 🏥 Clinical Relevance
+[Extract or infer clinical points mentioned or relevant to this topic]
 ## 💡 Study Tips
+[How to remember this specific information]
 ## ❓ Self-Test Questions
+1. [Question based on the page text]
+2. [Question based on the page text]
+3. [Question based on the page text]
+Be educational and specific to the provided text content."""
         payload = {
             "model": HYPERBOLIC_MODEL,
             "messages": [{"role": "user", "content": prompt}],
             "max_tokens": 800,
+            "temperature": 0.5
         }
         response = requests.post(HYPERBOLIC_API_URL, headers=headers, json=payload, timeout=25)
         result = response.json()
         explanation = result["choices"][0]["message"]["content"]
+        formatted_output = f"""# 📚 Textbook Analysis: {page_info}
 {explanation}
 ---
+💪 **Next Steps:** Mastered this page? Try the VIVA mode to test yourself!"""
         return formatted_output
             pdf_upload = gr.File(label="Upload Anatomy Textbook (PDF)", file_types=[".pdf"], type="binary")
             upload_status = gr.Markdown()
+            # State to hold extracted images, captions, and text
             book_images_state = gr.State([])
             page_captions_state = gr.State([])
+            page_texts_state = gr.State([])
             # Dropdown to select a page after processing
             page_dropdown = gr.Dropdown(label="Select Page", choices=[], interactive=False)
             # Process upload
             def handle_book_upload(pdf_bytes):
+                extracted_data, status_msg = process_uploaded_book(pdf_bytes)
+                if not extracted_data:
+                    # No data extracted
+                    return [], status_msg, [], [], gr.update(choices=[], interactive=False), None, ""
+                # Separate images, captions, and text
+                img_list = [item[0] for item in extracted_data]
+                caps = [item[1] for item in extracted_data]
+                texts = [item[2] for item in extracted_data]
                 # Update dropdown with captions and enable it
                 dropdown_update = gr.update(choices=caps, interactive=True)
+                return img_list, status_msg, caps, texts, dropdown_update, None, ""
             pdf_upload.upload(
                 fn=handle_book_upload,
                 inputs=[pdf_upload],
+                outputs=[book_images_state, upload_status, page_captions_state, page_texts_state, page_dropdown, selected_page_image, analysis_output]
             )
             # When a page is selected, show image and analysis
+            def show_page_analysis(selected_caption, images, captions, texts):
                 if not selected_caption:
                     return None, ""
                 # Find index
                     idx = captions.index(selected_caption)
                 except ValueError:
                     return None, ""
                 img = images[idx]
+                text = texts[idx] if idx < len(texts) else ""
+                analysis = analyze_book_image(img, selected_caption, text)
                 return img, analysis
+            page_dropdown.change(fn=show_page_analysis, inputs=[page_dropdown, book_images_state, page_captions_state, page_texts_state], outputs=[selected_page_image, analysis_output])
         # VIVA MODE TAB
         with gr.Tab("🎯 VIVA Training Mode") as viva_tab: