atz21 commited on
Commit
6295c4b
Β·
verified Β·
1 Parent(s): 306e0ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -66
app.py CHANGED
@@ -281,12 +281,26 @@ def save_as_pdf(text, filename="output.pdf"):
281
  Raises:
282
  Exception: If Pandoc or pdflatex is not available, or conversion fails
283
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  base_name = os.path.splitext(filename)[0]
285
  temp_md_file = f"{base_name}_input.md"
286
  temp_tex_file = f"{base_name}_temp.tex"
287
 
288
  print("\n" + "="*60)
289
- print("οΏ½ MARKDOWoN TO PDF CONVERSION PROCESS")
290
  print("="*60)
291
 
292
  try:
@@ -581,7 +595,7 @@ def merge_pdfs(paths, output_path):
581
  writer.write(f)
582
  return output_path
583
 
584
- def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.5-pro", fallback_model="gemini-2.5-flash", file_path=None):
585
  """
586
  Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
587
  Automatically rotates through available API keys on RESOURCE_EXHAUSTED errors.
@@ -592,7 +606,8 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
592
  file_upload_obj: Previously uploaded file object (optional)
593
  image_obj: Image or list of images (optional)
594
  model_name: Primary model to use
595
- fallback_model: Fallback model if primary fails
 
596
  file_path: Local file path (needed for re-upload when rotating keys)
597
 
598
  Returns textual response and prints progress.
@@ -669,7 +684,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
669
  if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
670
  print(f"⚠️ Quota exhausted for API key #{current_key_num} with model {model_name}")
671
 
672
- # Try fallback model with SAME API key before switching keys
673
  print(f"⚑ Trying fallback model {fallback_model} with same API key #{current_key_num}")
674
  try:
675
  response = current_client.models.generate_content(
@@ -684,44 +699,66 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
684
  error_fallback_str = str(e_fallback)
685
  print(f"❌ Fallback model {fallback_model} also failed: {e_fallback}")
686
 
687
- # Check if fallback also exhausted
688
  if "429" in error_fallback_str or "RESOURCE_EXHAUSTED" in error_fallback_str:
689
- print(f"⚠️ Fallback model also exhausted for API key #{current_key_num}")
690
 
691
- # Now try next API key if available
692
- if attempt < max_attempts - 1:
693
- # Check if we have file uploads and can re-upload
694
- if file_upload_obj and file_path:
695
- print(f"πŸ”„ Rotating to next API key and re-uploading file...")
696
- client_manager.rotate_to_next_key()
697
-
698
- # Re-upload file with new API key
699
- try:
700
- print(f"πŸ“€ Re-uploading file with API key #{client_manager.current_key_index + 1}...")
701
- current_file_obj = upload_to_gemini(file_path)
702
- print(f"βœ… File re-uploaded successfully")
703
- except Exception as upload_error:
704
- print(f"❌ Failed to re-upload file: {upload_error}")
705
- raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
 
 
 
706
 
707
- attempt += 1
708
- print(f"πŸ”„ Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
709
- continue
710
- elif file_upload_obj and not file_path:
711
- print("⚠️ WARNING: Cannot rotate API keys - file_path not provided for re-upload!")
712
- print(" To enable API key rotation with file uploads, pass file_path parameter.")
713
- raise Exception(f"All models exhausted for API key #{current_key_num}. Cannot rotate without file_path.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
  else:
715
- # No file uploads, safe to rotate
716
- client_manager.rotate_to_next_key()
717
- attempt += 1
718
- print(f"πŸ”„ Trying next API key (attempt {attempt + 1}/{max_attempts})...")
719
- continue
720
- else:
721
- raise Exception(f"All {max_attempts} API key(s) exhausted with both models.")
722
  else:
723
- # Fallback failed with different error
724
- raise Exception(f"Fallback model failed: {e_fallback}")
725
 
726
  elif "403" in error_str or "PERMISSION_DENIED" in error_str:
727
  # This happens when trying to access a file uploaded with a different API key
@@ -746,7 +783,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
746
  raise Exception(f"File access denied. Cannot re-upload without file_path. Error: {e}")
747
 
748
  else:
749
- # Other error - try fallback model with same key
750
  print(f"⚑ Trying fallback model {fallback_model} with same API key #{current_key_num}")
751
  try:
752
  response = current_client.models.generate_content(
@@ -758,34 +795,48 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
758
  client_manager.reset_to_primary()
759
  return raw_text
760
  except Exception as e2:
761
- print(f"❌ Fallback also failed: {e2}")
762
 
763
- # If we have more keys, try them
764
- if attempt < max_attempts - 1:
765
- if file_upload_obj and file_path:
766
- print(f"πŸ”„ Rotating to next API key and re-uploading file...")
767
- client_manager.rotate_to_next_key()
768
-
769
- try:
770
- print(f"πŸ“€ Re-uploading file with API key #{client_manager.current_key_index + 1}...")
771
- current_file_obj = upload_to_gemini(file_path)
772
- print(f"βœ… File re-uploaded successfully")
773
- except Exception as upload_error:
774
- print(f"❌ Failed to re-upload file: {upload_error}")
775
- raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
776
-
777
- attempt += 1
778
- print(f"πŸ”„ Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
779
- continue
780
- elif file_upload_obj and not file_path:
781
- raise Exception(f"All models failed. Cannot rotate keys without file_path. Last error: {e2}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  else:
783
- client_manager.rotate_to_next_key()
784
- attempt += 1
785
- print(f"πŸ”„ Trying next API key (attempt {attempt + 1}/{max_attempts})...")
786
- continue
787
- else:
788
- raise Exception(f"All attempts failed. Last error: {e2}")
789
 
790
  # If we exhausted all attempts
791
  raise Exception(f"❌ All {max_attempts} API key(s) exhausted. Please check your quota or try again later.")
@@ -793,6 +844,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
793
 
794
 
795
 
 
796
  # ---------------- PARSERS ----------------
797
  def extract_question_ids_from_qpms(text: str):
798
  """Extract question IDs from QP+MS transcript."""
@@ -1348,7 +1400,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
1348
 
1349
  print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
1350
  qpms_prompt = QP_MS_TRANSCRIPTION_PROMPT["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β†’ Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
1351
- qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", file_path=merged_qpms_path)
1352
  print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
1353
  with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
1354
  f.write(qpms_text)
@@ -1366,7 +1418,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
1366
 
1367
  print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
1368
  as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β†’ Page <number>\n(One per line, after all answers)"
1369
- as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", file_path=ans_path)
1370
  print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
1371
  with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
1372
  f.write(as_text)
 
281
  Raises:
282
  Exception: If Pandoc or pdflatex is not available, or conversion fails
283
  """
284
+ # Sanitize filename - replace spaces and special characters with underscores
285
+ # This prevents issues with pdflatex and file operations
286
+ import string
287
+ valid_chars = f"-_.() {string.ascii_letters}{string.digits}"
288
+ sanitized_filename = ''.join(c if c in valid_chars else '_' for c in filename)
289
+ # Replace multiple spaces with single underscore
290
+ sanitized_filename = re.sub(r'\s+', '_', sanitized_filename)
291
+ # Remove double underscores
292
+ sanitized_filename = re.sub(r'_+', '_', sanitized_filename)
293
+
294
+ if sanitized_filename != filename:
295
+ print(f"ℹ️ Sanitized filename: '{filename}' β†’ '{sanitized_filename}'")
296
+ filename = sanitized_filename
297
+
298
  base_name = os.path.splitext(filename)[0]
299
  temp_md_file = f"{base_name}_input.md"
300
  temp_tex_file = f"{base_name}_temp.tex"
301
 
302
  print("\n" + "="*60)
303
+ print("πŸ“„ MARKDOWN TO PDF CONVERSION PROCESS")
304
  print("="*60)
305
 
306
  try:
 
595
  writer.write(f)
596
  return output_path
597
 
598
+ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.5-pro", fallback_model="gemini-2.5-flash", fallback_model_2="gemini-2.5-flash-lite", file_path=None):
599
  """
600
  Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
601
  Automatically rotates through available API keys on RESOURCE_EXHAUSTED errors.
 
606
  file_upload_obj: Previously uploaded file object (optional)
607
  image_obj: Image or list of images (optional)
608
  model_name: Primary model to use
609
+ fallback_model: First fallback model if primary fails
610
+ fallback_model_2: Second fallback model if first fallback fails
611
  file_path: Local file path (needed for re-upload when rotating keys)
612
 
613
  Returns textual response and prints progress.
 
684
  if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
685
  print(f"⚠️ Quota exhausted for API key #{current_key_num} with model {model_name}")
686
 
687
+ # Try first fallback model with SAME API key
688
  print(f"⚑ Trying fallback model {fallback_model} with same API key #{current_key_num}")
689
  try:
690
  response = current_client.models.generate_content(
 
699
  error_fallback_str = str(e_fallback)
700
  print(f"❌ Fallback model {fallback_model} also failed: {e_fallback}")
701
 
702
+ # Check if first fallback also exhausted
703
  if "429" in error_fallback_str or "RESOURCE_EXHAUSTED" in error_fallback_str:
704
+ print(f"⚠️ First fallback model also exhausted for API key #{current_key_num}")
705
 
706
+ # Try second fallback model with SAME API key
707
+ print(f"⚑ Trying second fallback model {fallback_model_2} with same API key #{current_key_num}")
708
+ try:
709
+ response = current_client.models.generate_content(
710
+ model=fallback_model_2,
711
+ contents=contents
712
+ )
713
+ raw_text = response.text
714
+ print(f"πŸ“₯ Received response (chars): {len(raw_text)}")
715
+ client_manager.reset_to_primary()
716
+ return raw_text
717
+ except Exception as e_fallback_2:
718
+ error_fallback_2_str = str(e_fallback_2)
719
+ print(f"❌ Second fallback model {fallback_model_2} also failed: {e_fallback_2}")
720
+
721
+ # Check if second fallback also exhausted
722
+ if "429" in error_fallback_2_str or "RESOURCE_EXHAUSTED" in error_fallback_2_str:
723
+ print(f"⚠️ All 3 models exhausted for API key #{current_key_num}")
724
 
725
+ # Now try next API key if available
726
+ if attempt < max_attempts - 1:
727
+ # Check if we have file uploads and can re-upload
728
+ if file_upload_obj and file_path:
729
+ print(f"πŸ”„ Rotating to next API key and re-uploading file...")
730
+ client_manager.rotate_to_next_key()
731
+
732
+ # Re-upload file with new API key
733
+ try:
734
+ print(f"πŸ“€ Re-uploading file with API key #{client_manager.current_key_index + 1}...")
735
+ current_file_obj = upload_to_gemini(file_path)
736
+ print(f"βœ… File re-uploaded successfully")
737
+ except Exception as upload_error:
738
+ print(f"❌ Failed to re-upload file: {upload_error}")
739
+ raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
740
+
741
+ attempt += 1
742
+ print(f"πŸ”„ Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
743
+ continue
744
+ elif file_upload_obj and not file_path:
745
+ print("⚠️ WARNING: Cannot rotate API keys - file_path not provided for re-upload!")
746
+ print(" To enable API key rotation with file uploads, pass file_path parameter.")
747
+ raise Exception(f"All 3 models exhausted for API key #{current_key_num}. Cannot rotate without file_path.")
748
+ else:
749
+ # No file uploads, safe to rotate
750
+ client_manager.rotate_to_next_key()
751
+ attempt += 1
752
+ print(f"πŸ”„ Trying next API key (attempt {attempt + 1}/{max_attempts})...")
753
+ continue
754
+ else:
755
+ raise Exception(f"All {max_attempts} API key(s) exhausted with all 3 models.")
756
  else:
757
+ # Second fallback failed with different error
758
+ raise Exception(f"Second fallback model failed: {e_fallback_2}")
 
 
 
 
 
759
  else:
760
+ # First fallback failed with different error
761
+ raise Exception(f"First fallback model failed: {e_fallback}")
762
 
763
  elif "403" in error_str or "PERMISSION_DENIED" in error_str:
764
  # This happens when trying to access a file uploaded with a different API key
 
783
  raise Exception(f"File access denied. Cannot re-upload without file_path. Error: {e}")
784
 
785
  else:
786
+ # Other error - try fallback models with same key
787
  print(f"⚑ Trying fallback model {fallback_model} with same API key #{current_key_num}")
788
  try:
789
  response = current_client.models.generate_content(
 
795
  client_manager.reset_to_primary()
796
  return raw_text
797
  except Exception as e2:
798
+ print(f"❌ First fallback also failed: {e2}")
799
 
800
+ # Try second fallback
801
+ print(f"⚑ Trying second fallback model {fallback_model_2} with same API key #{current_key_num}")
802
+ try:
803
+ response = current_client.models.generate_content(
804
+ model=fallback_model_2,
805
+ contents=contents
806
+ )
807
+ raw_text = response.text
808
+ print(f"πŸ“₯ Received response (chars): {len(raw_text)}")
809
+ client_manager.reset_to_primary()
810
+ return raw_text
811
+ except Exception as e3:
812
+ print(f"❌ Second fallback also failed: {e3}")
813
+
814
+ # If we have more keys, try them
815
+ if attempt < max_attempts - 1:
816
+ if file_upload_obj and file_path:
817
+ print(f"πŸ”„ Rotating to next API key and re-uploading file...")
818
+ client_manager.rotate_to_next_key()
819
+
820
+ try:
821
+ print(f"πŸ“€ Re-uploading file with API key #{client_manager.current_key_index + 1}...")
822
+ current_file_obj = upload_to_gemini(file_path)
823
+ print(f"βœ… File re-uploaded successfully")
824
+ except Exception as upload_error:
825
+ print(f"❌ Failed to re-upload file: {upload_error}")
826
+ raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
827
+
828
+ attempt += 1
829
+ print(f"πŸ”„ Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
830
+ continue
831
+ elif file_upload_obj and not file_path:
832
+ raise Exception(f"All models failed. Cannot rotate keys without file_path. Last error: {e3}")
833
+ else:
834
+ client_manager.rotate_to_next_key()
835
+ attempt += 1
836
+ print(f"πŸ”„ Trying next API key (attempt {attempt + 1}/{max_attempts})...")
837
+ continue
838
  else:
839
+ raise Exception(f"All attempts failed. Last error: {e3}")
 
 
 
 
 
840
 
841
  # If we exhausted all attempts
842
  raise Exception(f"❌ All {max_attempts} API key(s) exhausted. Please check your quota or try again later.")
 
844
 
845
 
846
 
847
+
848
  # ---------------- PARSERS ----------------
849
  def extract_question_ids_from_qpms(text: str):
850
  """Extract question IDs from QP+MS transcript."""
 
1400
 
1401
  print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
1402
  qpms_prompt = QP_MS_TRANSCRIPTION_PROMPT["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β†’ Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
1403
+ qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", fallback_model_2="gemini-2.5-flash-lite", file_path=merged_qpms_path)
1404
  print("πŸ“„ QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
1405
  with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
1406
  f.write(qpms_text)
 
1418
 
1419
  print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
1420
  as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β†’ Page <number>\n(One per line, after all answers)"
1421
+ as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", fallback_model_2="gemini-2.5-flash-lite", file_path=ans_path)
1422
  print("πŸ“ AS transcription received. Saving debug file: debug_as_transcript.txt")
1423
  with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
1424
  f.write(as_text)