TRIAL

Running

App Files Files Community

atz21 commited on Dec 17, 2025

Commit

6295c4b

verified ·

1 Parent(s): 306e0ab

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -66

app.py CHANGED Viewed

@@ -281,12 +281,26 @@ def save_as_pdf(text, filename="output.pdf"):
     Raises:
         Exception: If Pandoc or pdflatex is not available, or conversion fails
     """
     base_name = os.path.splitext(filename)[0]
     temp_md_file = f"{base_name}_input.md"
     temp_tex_file = f"{base_name}_temp.tex"
     print("\n" + "="*60)
-    print("� MARKDOWoN TO PDF CONVERSION PROCESS")
     print("="*60)
     try:
@@ -581,7 +595,7 @@ def merge_pdfs(paths, output_path):
         writer.write(f)
     return output_path
-def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.5-pro", fallback_model="gemini-2.5-flash", file_path=None):
     """
     Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
     Automatically rotates through available API keys on RESOURCE_EXHAUSTED errors.
@@ -592,7 +606,8 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
         file_upload_obj: Previously uploaded file object (optional)
         image_obj: Image or list of images (optional)
         model_name: Primary model to use
-        fallback_model: Fallback model if primary fails
         file_path: Local file path (needed for re-upload when rotating keys)
     Returns textual response and prints progress.
@@ -669,7 +684,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
             if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
                 print(f"⚠️ Quota exhausted for API key #{current_key_num} with model {model_name}")
-                # Try fallback model with SAME API key before switching keys
                 print(f"⚡ Trying fallback model {fallback_model} with same API key #{current_key_num}")
                 try:
                     response = current_client.models.generate_content(
@@ -684,44 +699,66 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
                     error_fallback_str = str(e_fallback)
                     print(f"❌ Fallback model {fallback_model} also failed: {e_fallback}")
-                    # Check if fallback also exhausted
                     if "429" in error_fallback_str or "RESOURCE_EXHAUSTED" in error_fallback_str:
-                        print(f"⚠️ Fallback model also exhausted for API key #{current_key_num}")
-                        # Now try next API key if available
-                        if attempt < max_attempts - 1:
-                            # Check if we have file uploads and can re-upload
-                            if file_upload_obj and file_path:
-                                print(f"🔄 Rotating to next API key and re-uploading file...")
-                                client_manager.rotate_to_next_key()
-                                # Re-upload file with new API key
-                                try:
-                                    print(f"📤 Re-uploading file with API key #{client_manager.current_key_index + 1}...")
-                                    current_file_obj = upload_to_gemini(file_path)
-                                    print(f"✅ File re-uploaded successfully")
-                                except Exception as upload_error:
-                                    print(f"❌ Failed to re-upload file: {upload_error}")
-                                    raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
-                                attempt += 1
-                                print(f"🔄 Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
-                                continue
-                            elif file_upload_obj and not file_path:
-                                print("⚠️ WARNING: Cannot rotate API keys - file_path not provided for re-upload!")
-                                print("   To enable API key rotation with file uploads, pass file_path parameter.")
-                                raise Exception(f"All models exhausted for API key #{current_key_num}. Cannot rotate without file_path.")
                             else:
-                                # No file uploads, safe to rotate
-                                client_manager.rotate_to_next_key()
-                                attempt += 1
-                                print(f"🔄 Trying next API key (attempt {attempt + 1}/{max_attempts})...")
-                                continue
-                        else:
-                            raise Exception(f"All {max_attempts} API key(s) exhausted with both models.")
                     else:
-                        # Fallback failed with different error
-                        raise Exception(f"Fallback model failed: {e_fallback}")
             elif "403" in error_str or "PERMISSION_DENIED" in error_str:
                 # This happens when trying to access a file uploaded with a different API key
@@ -746,7 +783,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
                     raise Exception(f"File access denied. Cannot re-upload without file_path. Error: {e}")
             else:
-                # Other error - try fallback model with same key
                 print(f"⚡ Trying fallback model {fallback_model} with same API key #{current_key_num}")
                 try:
                     response = current_client.models.generate_content(
@@ -758,34 +795,48 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
                     client_manager.reset_to_primary()
                     return raw_text
                 except Exception as e2:
-                    print(f"❌ Fallback also failed: {e2}")
-                    # If we have more keys, try them
-                    if attempt < max_attempts - 1:
-                        if file_upload_obj and file_path:
-                            print(f"🔄 Rotating to next API key and re-uploading file...")
-                            client_manager.rotate_to_next_key()
-                            try:
-                                print(f"📤 Re-uploading file with API key #{client_manager.current_key_index + 1}...")
-                                current_file_obj = upload_to_gemini(file_path)
-                                print(f"✅ File re-uploaded successfully")
-                            except Exception as upload_error:
-                                print(f"❌ Failed to re-upload file: {upload_error}")
-                                raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
-                            attempt += 1
-                            print(f"🔄 Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
-                            continue
-                        elif file_upload_obj and not file_path:
-                            raise Exception(f"All models failed. Cannot rotate keys without file_path. Last error: {e2}")
                         else:
-                            client_manager.rotate_to_next_key()
-                            attempt += 1
-                            print(f"🔄 Trying next API key (attempt {attempt + 1}/{max_attempts})...")
-                            continue
-                    else:
-                        raise Exception(f"All attempts failed. Last error: {e2}")
     # If we exhausted all attempts
     raise Exception(f"❌ All {max_attempts} API key(s) exhausted. Please check your quota or try again later.")
@@ -793,6 +844,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
 # ---------------- PARSERS ----------------
 def extract_question_ids_from_qpms(text: str):
     """Extract question IDs from QP+MS transcript."""
@@ -1348,7 +1400,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
         print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
         qpms_prompt = QP_MS_TRANSCRIPTION_PROMPT["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> → Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
-        qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", file_path=merged_qpms_path)
         print("📄 QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
         with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
             f.write(qpms_text)
@@ -1366,7 +1418,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
         print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
         as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> → Page <number>\n(One per line, after all answers)"
-        as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", file_path=ans_path)
         print("📝 AS transcription received. Saving debug file: debug_as_transcript.txt")
         with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
             f.write(as_text)

     Raises:
         Exception: If Pandoc or pdflatex is not available, or conversion fails
     """
+    # Sanitize filename - replace spaces and special characters with underscores
+    # This prevents issues with pdflatex and file operations
+    import string
+    valid_chars = f"-_.() {string.ascii_letters}{string.digits}"
+    sanitized_filename = ''.join(c if c in valid_chars else '_' for c in filename)
+    # Replace multiple spaces with single underscore
+    sanitized_filename = re.sub(r'\s+', '_', sanitized_filename)
+    # Remove double underscores
+    sanitized_filename = re.sub(r'_+', '_', sanitized_filename)
+    if sanitized_filename != filename:
+        print(f"ℹ️ Sanitized filename: '{filename}' → '{sanitized_filename}'")
+        filename = sanitized_filename
     base_name = os.path.splitext(filename)[0]
     temp_md_file = f"{base_name}_input.md"
     temp_tex_file = f"{base_name}_temp.tex"
     print("\n" + "="*60)
+    print("📄 MARKDOWN TO PDF CONVERSION PROCESS")
     print("="*60)
     try:
         writer.write(f)
     return output_path
+def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.5-pro", fallback_model="gemini-2.5-flash", fallback_model_2="gemini-2.5-flash-lite", file_path=None):
     """
     Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
     Automatically rotates through available API keys on RESOURCE_EXHAUSTED errors.
         file_upload_obj: Previously uploaded file object (optional)
         image_obj: Image or list of images (optional)
         model_name: Primary model to use
+        fallback_model: First fallback model if primary fails
+        fallback_model_2: Second fallback model if first fallback fails
         file_path: Local file path (needed for re-upload when rotating keys)
     Returns textual response and prints progress.
             if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
                 print(f"⚠️ Quota exhausted for API key #{current_key_num} with model {model_name}")
+                # Try first fallback model with SAME API key
                 print(f"⚡ Trying fallback model {fallback_model} with same API key #{current_key_num}")
                 try:
                     response = current_client.models.generate_content(
                     error_fallback_str = str(e_fallback)
                     print(f"❌ Fallback model {fallback_model} also failed: {e_fallback}")
+                    # Check if first fallback also exhausted
                     if "429" in error_fallback_str or "RESOURCE_EXHAUSTED" in error_fallback_str:
+                        print(f"⚠️ First fallback model also exhausted for API key #{current_key_num}")
+                        # Try second fallback model with SAME API key
+                        print(f"⚡ Trying second fallback model {fallback_model_2} with same API key #{current_key_num}")
+                        try:
+                            response = current_client.models.generate_content(
+                                model=fallback_model_2,
+                                contents=contents
+                            )
+                            raw_text = response.text
+                            print(f"📥 Received response (chars): {len(raw_text)}")
+                            client_manager.reset_to_primary()
+                            return raw_text
+                        except Exception as e_fallback_2:
+                            error_fallback_2_str = str(e_fallback_2)
+                            print(f"❌ Second fallback model {fallback_model_2} also failed: {e_fallback_2}")
+                            # Check if second fallback also exhausted
+                            if "429" in error_fallback_2_str or "RESOURCE_EXHAUSTED" in error_fallback_2_str:
+                                print(f"⚠️ All 3 models exhausted for API key #{current_key_num}")
+                                # Now try next API key if available
+                                if attempt < max_attempts - 1:
+                                    # Check if we have file uploads and can re-upload
+                                    if file_upload_obj and file_path:
+                                        print(f"🔄 Rotating to next API key and re-uploading file...")
+                                        client_manager.rotate_to_next_key()
+                                        # Re-upload file with new API key
+                                        try:
+                                            print(f"📤 Re-uploading file with API key #{client_manager.current_key_index + 1}...")
+                                            current_file_obj = upload_to_gemini(file_path)
+                                            print(f"✅ File re-uploaded successfully")
+                                        except Exception as upload_error:
+                                            print(f"❌ Failed to re-upload file: {upload_error}")
+                                            raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
+                                        attempt += 1
+                                        print(f"🔄 Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
+                                        continue
+                                    elif file_upload_obj and not file_path:
+                                        print("⚠️ WARNING: Cannot rotate API keys - file_path not provided for re-upload!")
+                                        print("   To enable API key rotation with file uploads, pass file_path parameter.")
+                                        raise Exception(f"All 3 models exhausted for API key #{current_key_num}. Cannot rotate without file_path.")
+                                    else:
+                                        # No file uploads, safe to rotate
+                                        client_manager.rotate_to_next_key()
+                                        attempt += 1
+                                        print(f"🔄 Trying next API key (attempt {attempt + 1}/{max_attempts})...")
+                                        continue
+                                else:
+                                    raise Exception(f"All {max_attempts} API key(s) exhausted with all 3 models.")
                             else:
+                                # Second fallback failed with different error
+                                raise Exception(f"Second fallback model failed: {e_fallback_2}")
                     else:
+                        # First fallback failed with different error
+                        raise Exception(f"First fallback model failed: {e_fallback}")
             elif "403" in error_str or "PERMISSION_DENIED" in error_str:
                 # This happens when trying to access a file uploaded with a different API key
                     raise Exception(f"File access denied. Cannot re-upload without file_path. Error: {e}")
             else:
+                # Other error - try fallback models with same key
                 print(f"⚡ Trying fallback model {fallback_model} with same API key #{current_key_num}")
                 try:
                     response = current_client.models.generate_content(
                     client_manager.reset_to_primary()
                     return raw_text
                 except Exception as e2:
+                    print(f"❌ First fallback also failed: {e2}")
+                    # Try second fallback
+                    print(f"⚡ Trying second fallback model {fallback_model_2} with same API key #{current_key_num}")
+                    try:
+                        response = current_client.models.generate_content(
+                            model=fallback_model_2,
+                            contents=contents
+                        )
+                        raw_text = response.text
+                        print(f"📥 Received response (chars): {len(raw_text)}")
+                        client_manager.reset_to_primary()
+                        return raw_text
+                    except Exception as e3:
+                        print(f"❌ Second fallback also failed: {e3}")
+                        # If we have more keys, try them
+                        if attempt < max_attempts - 1:
+                            if file_upload_obj and file_path:
+                                print(f"🔄 Rotating to next API key and re-uploading file...")
+                                client_manager.rotate_to_next_key()
+                                try:
+                                    print(f"📤 Re-uploading file with API key #{client_manager.current_key_index + 1}...")
+                                    current_file_obj = upload_to_gemini(file_path)
+                                    print(f"✅ File re-uploaded successfully")
+                                except Exception as upload_error:
+                                    print(f"❌ Failed to re-upload file: {upload_error}")
+                                    raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
+                                attempt += 1
+                                print(f"🔄 Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
+                                continue
+                            elif file_upload_obj and not file_path:
+                                raise Exception(f"All models failed. Cannot rotate keys without file_path. Last error: {e3}")
+                            else:
+                                client_manager.rotate_to_next_key()
+                                attempt += 1
+                                print(f"🔄 Trying next API key (attempt {attempt + 1}/{max_attempts})...")
+                                continue
                         else:
+                            raise Exception(f"All attempts failed. Last error: {e3}")
     # If we exhausted all attempts
     raise Exception(f"❌ All {max_attempts} API key(s) exhausted. Please check your quota or try again later.")
 # ---------------- PARSERS ----------------
 def extract_question_ids_from_qpms(text: str):
     """Extract question IDs from QP+MS transcript."""
         print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
         qpms_prompt = QP_MS_TRANSCRIPTION_PROMPT["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> → Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
+        qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", fallback_model_2="gemini-2.5-flash-lite", file_path=merged_qpms_path)
         print("📄 QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
         with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
             f.write(qpms_text)
         print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
         as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> → Page <number>\n(One per line, after all answers)"
+        as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", fallback_model_2="gemini-2.5-flash-lite", file_path=ans_path)
         print("📝 AS transcription received. Saving debug file: debug_as_transcript.txt")
         with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
             f.write(as_text)