Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -281,12 +281,26 @@ def save_as_pdf(text, filename="output.pdf"):
|
|
| 281 |
Raises:
|
| 282 |
Exception: If Pandoc or pdflatex is not available, or conversion fails
|
| 283 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
base_name = os.path.splitext(filename)[0]
|
| 285 |
temp_md_file = f"{base_name}_input.md"
|
| 286 |
temp_tex_file = f"{base_name}_temp.tex"
|
| 287 |
|
| 288 |
print("\n" + "="*60)
|
| 289 |
-
print("
|
| 290 |
print("="*60)
|
| 291 |
|
| 292 |
try:
|
|
@@ -581,7 +595,7 @@ def merge_pdfs(paths, output_path):
|
|
| 581 |
writer.write(f)
|
| 582 |
return output_path
|
| 583 |
|
| 584 |
-
def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.5-pro", fallback_model="gemini-2.5-flash", file_path=None):
|
| 585 |
"""
|
| 586 |
Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
|
| 587 |
Automatically rotates through available API keys on RESOURCE_EXHAUSTED errors.
|
|
@@ -592,7 +606,8 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
|
|
| 592 |
file_upload_obj: Previously uploaded file object (optional)
|
| 593 |
image_obj: Image or list of images (optional)
|
| 594 |
model_name: Primary model to use
|
| 595 |
-
fallback_model:
|
|
|
|
| 596 |
file_path: Local file path (needed for re-upload when rotating keys)
|
| 597 |
|
| 598 |
Returns textual response and prints progress.
|
|
@@ -669,7 +684,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
|
|
| 669 |
if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
|
| 670 |
print(f"β οΈ Quota exhausted for API key #{current_key_num} with model {model_name}")
|
| 671 |
|
| 672 |
-
# Try fallback model with SAME API key
|
| 673 |
print(f"β‘ Trying fallback model {fallback_model} with same API key #{current_key_num}")
|
| 674 |
try:
|
| 675 |
response = current_client.models.generate_content(
|
|
@@ -684,44 +699,66 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
|
|
| 684 |
error_fallback_str = str(e_fallback)
|
| 685 |
print(f"β Fallback model {fallback_model} also failed: {e_fallback}")
|
| 686 |
|
| 687 |
-
# Check if fallback also exhausted
|
| 688 |
if "429" in error_fallback_str or "RESOURCE_EXHAUSTED" in error_fallback_str:
|
| 689 |
-
print(f"β οΈ
|
| 690 |
|
| 691 |
-
#
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
|
|
|
|
|
|
|
|
|
| 706 |
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
else:
|
| 715 |
-
#
|
| 716 |
-
|
| 717 |
-
attempt += 1
|
| 718 |
-
print(f"π Trying next API key (attempt {attempt + 1}/{max_attempts})...")
|
| 719 |
-
continue
|
| 720 |
-
else:
|
| 721 |
-
raise Exception(f"All {max_attempts} API key(s) exhausted with both models.")
|
| 722 |
else:
|
| 723 |
-
#
|
| 724 |
-
raise Exception(f"
|
| 725 |
|
| 726 |
elif "403" in error_str or "PERMISSION_DENIED" in error_str:
|
| 727 |
# This happens when trying to access a file uploaded with a different API key
|
|
@@ -746,7 +783,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
|
|
| 746 |
raise Exception(f"File access denied. Cannot re-upload without file_path. Error: {e}")
|
| 747 |
|
| 748 |
else:
|
| 749 |
-
# Other error - try fallback
|
| 750 |
print(f"β‘ Trying fallback model {fallback_model} with same API key #{current_key_num}")
|
| 751 |
try:
|
| 752 |
response = current_client.models.generate_content(
|
|
@@ -758,34 +795,48 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
|
|
| 758 |
client_manager.reset_to_primary()
|
| 759 |
return raw_text
|
| 760 |
except Exception as e2:
|
| 761 |
-
print(f"β
|
| 762 |
|
| 763 |
-
#
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 782 |
else:
|
| 783 |
-
|
| 784 |
-
attempt += 1
|
| 785 |
-
print(f"π Trying next API key (attempt {attempt + 1}/{max_attempts})...")
|
| 786 |
-
continue
|
| 787 |
-
else:
|
| 788 |
-
raise Exception(f"All attempts failed. Last error: {e2}")
|
| 789 |
|
| 790 |
# If we exhausted all attempts
|
| 791 |
raise Exception(f"β All {max_attempts} API key(s) exhausted. Please check your quota or try again later.")
|
|
@@ -793,6 +844,7 @@ def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, m
|
|
| 793 |
|
| 794 |
|
| 795 |
|
|
|
|
| 796 |
# ---------------- PARSERS ----------------
|
| 797 |
def extract_question_ids_from_qpms(text: str):
|
| 798 |
"""Extract question IDs from QP+MS transcript."""
|
|
@@ -1348,7 +1400,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
|
|
| 1348 |
|
| 1349 |
print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
|
| 1350 |
qpms_prompt = QP_MS_TRANSCRIPTION_PROMPT["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
|
| 1351 |
-
qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", file_path=merged_qpms_path)
|
| 1352 |
print("π QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
|
| 1353 |
with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
|
| 1354 |
f.write(qpms_text)
|
|
@@ -1366,7 +1418,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, subject="Maths", imprin
|
|
| 1366 |
|
| 1367 |
print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
|
| 1368 |
as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β Page <number>\n(One per line, after all answers)"
|
| 1369 |
-
as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", file_path=ans_path)
|
| 1370 |
print("π AS transcription received. Saving debug file: debug_as_transcript.txt")
|
| 1371 |
with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
|
| 1372 |
f.write(as_text)
|
|
|
|
| 281 |
Raises:
|
| 282 |
Exception: If Pandoc or pdflatex is not available, or conversion fails
|
| 283 |
"""
|
| 284 |
+
# Sanitize filename - replace spaces and special characters with underscores
|
| 285 |
+
# This prevents issues with pdflatex and file operations
|
| 286 |
+
import string
|
| 287 |
+
valid_chars = f"-_.() {string.ascii_letters}{string.digits}"
|
| 288 |
+
sanitized_filename = ''.join(c if c in valid_chars else '_' for c in filename)
|
| 289 |
+
# Replace multiple spaces with single underscore
|
| 290 |
+
sanitized_filename = re.sub(r'\s+', '_', sanitized_filename)
|
| 291 |
+
# Remove double underscores
|
| 292 |
+
sanitized_filename = re.sub(r'_+', '_', sanitized_filename)
|
| 293 |
+
|
| 294 |
+
if sanitized_filename != filename:
|
| 295 |
+
print(f"βΉοΈ Sanitized filename: '{filename}' β '{sanitized_filename}'")
|
| 296 |
+
filename = sanitized_filename
|
| 297 |
+
|
| 298 |
base_name = os.path.splitext(filename)[0]
|
| 299 |
temp_md_file = f"{base_name}_input.md"
|
| 300 |
temp_tex_file = f"{base_name}_temp.tex"
|
| 301 |
|
| 302 |
print("\n" + "="*60)
|
| 303 |
+
print("π MARKDOWN TO PDF CONVERSION PROCESS")
|
| 304 |
print("="*60)
|
| 305 |
|
| 306 |
try:
|
|
|
|
| 595 |
writer.write(f)
|
| 596 |
return output_path
|
| 597 |
|
| 598 |
+
def gemini_generate_content(prompt_text, file_upload_obj=None, image_obj=None, model_name="gemini-2.5-pro", fallback_model="gemini-2.5-flash", fallback_model_2="gemini-2.5-flash-lite", file_path=None):
|
| 599 |
"""
|
| 600 |
Send prompt_text and optionally an uploaded file (or an image object/list) to the model using NEW SDK.
|
| 601 |
Automatically rotates through available API keys on RESOURCE_EXHAUSTED errors.
|
|
|
|
| 606 |
file_upload_obj: Previously uploaded file object (optional)
|
| 607 |
image_obj: Image or list of images (optional)
|
| 608 |
model_name: Primary model to use
|
| 609 |
+
fallback_model: First fallback model if primary fails
|
| 610 |
+
fallback_model_2: Second fallback model if first fallback fails
|
| 611 |
file_path: Local file path (needed for re-upload when rotating keys)
|
| 612 |
|
| 613 |
Returns textual response and prints progress.
|
|
|
|
| 684 |
if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
|
| 685 |
print(f"β οΈ Quota exhausted for API key #{current_key_num} with model {model_name}")
|
| 686 |
|
| 687 |
+
# Try first fallback model with SAME API key
|
| 688 |
print(f"β‘ Trying fallback model {fallback_model} with same API key #{current_key_num}")
|
| 689 |
try:
|
| 690 |
response = current_client.models.generate_content(
|
|
|
|
| 699 |
error_fallback_str = str(e_fallback)
|
| 700 |
print(f"β Fallback model {fallback_model} also failed: {e_fallback}")
|
| 701 |
|
| 702 |
+
# Check if first fallback also exhausted
|
| 703 |
if "429" in error_fallback_str or "RESOURCE_EXHAUSTED" in error_fallback_str:
|
| 704 |
+
print(f"β οΈ First fallback model also exhausted for API key #{current_key_num}")
|
| 705 |
|
| 706 |
+
# Try second fallback model with SAME API key
|
| 707 |
+
print(f"β‘ Trying second fallback model {fallback_model_2} with same API key #{current_key_num}")
|
| 708 |
+
try:
|
| 709 |
+
response = current_client.models.generate_content(
|
| 710 |
+
model=fallback_model_2,
|
| 711 |
+
contents=contents
|
| 712 |
+
)
|
| 713 |
+
raw_text = response.text
|
| 714 |
+
print(f"π₯ Received response (chars): {len(raw_text)}")
|
| 715 |
+
client_manager.reset_to_primary()
|
| 716 |
+
return raw_text
|
| 717 |
+
except Exception as e_fallback_2:
|
| 718 |
+
error_fallback_2_str = str(e_fallback_2)
|
| 719 |
+
print(f"β Second fallback model {fallback_model_2} also failed: {e_fallback_2}")
|
| 720 |
+
|
| 721 |
+
# Check if second fallback also exhausted
|
| 722 |
+
if "429" in error_fallback_2_str or "RESOURCE_EXHAUSTED" in error_fallback_2_str:
|
| 723 |
+
print(f"β οΈ All 3 models exhausted for API key #{current_key_num}")
|
| 724 |
|
| 725 |
+
# Now try next API key if available
|
| 726 |
+
if attempt < max_attempts - 1:
|
| 727 |
+
# Check if we have file uploads and can re-upload
|
| 728 |
+
if file_upload_obj and file_path:
|
| 729 |
+
print(f"π Rotating to next API key and re-uploading file...")
|
| 730 |
+
client_manager.rotate_to_next_key()
|
| 731 |
+
|
| 732 |
+
# Re-upload file with new API key
|
| 733 |
+
try:
|
| 734 |
+
print(f"π€ Re-uploading file with API key #{client_manager.current_key_index + 1}...")
|
| 735 |
+
current_file_obj = upload_to_gemini(file_path)
|
| 736 |
+
print(f"β
File re-uploaded successfully")
|
| 737 |
+
except Exception as upload_error:
|
| 738 |
+
print(f"β Failed to re-upload file: {upload_error}")
|
| 739 |
+
raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
|
| 740 |
+
|
| 741 |
+
attempt += 1
|
| 742 |
+
print(f"π Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
|
| 743 |
+
continue
|
| 744 |
+
elif file_upload_obj and not file_path:
|
| 745 |
+
print("β οΈ WARNING: Cannot rotate API keys - file_path not provided for re-upload!")
|
| 746 |
+
print(" To enable API key rotation with file uploads, pass file_path parameter.")
|
| 747 |
+
raise Exception(f"All 3 models exhausted for API key #{current_key_num}. Cannot rotate without file_path.")
|
| 748 |
+
else:
|
| 749 |
+
# No file uploads, safe to rotate
|
| 750 |
+
client_manager.rotate_to_next_key()
|
| 751 |
+
attempt += 1
|
| 752 |
+
print(f"π Trying next API key (attempt {attempt + 1}/{max_attempts})...")
|
| 753 |
+
continue
|
| 754 |
+
else:
|
| 755 |
+
raise Exception(f"All {max_attempts} API key(s) exhausted with all 3 models.")
|
| 756 |
else:
|
| 757 |
+
# Second fallback failed with different error
|
| 758 |
+
raise Exception(f"Second fallback model failed: {e_fallback_2}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 759 |
else:
|
| 760 |
+
# First fallback failed with different error
|
| 761 |
+
raise Exception(f"First fallback model failed: {e_fallback}")
|
| 762 |
|
| 763 |
elif "403" in error_str or "PERMISSION_DENIED" in error_str:
|
| 764 |
# This happens when trying to access a file uploaded with a different API key
|
|
|
|
| 783 |
raise Exception(f"File access denied. Cannot re-upload without file_path. Error: {e}")
|
| 784 |
|
| 785 |
else:
|
| 786 |
+
# Other error - try fallback models with same key
|
| 787 |
print(f"β‘ Trying fallback model {fallback_model} with same API key #{current_key_num}")
|
| 788 |
try:
|
| 789 |
response = current_client.models.generate_content(
|
|
|
|
| 795 |
client_manager.reset_to_primary()
|
| 796 |
return raw_text
|
| 797 |
except Exception as e2:
|
| 798 |
+
print(f"β First fallback also failed: {e2}")
|
| 799 |
|
| 800 |
+
# Try second fallback
|
| 801 |
+
print(f"β‘ Trying second fallback model {fallback_model_2} with same API key #{current_key_num}")
|
| 802 |
+
try:
|
| 803 |
+
response = current_client.models.generate_content(
|
| 804 |
+
model=fallback_model_2,
|
| 805 |
+
contents=contents
|
| 806 |
+
)
|
| 807 |
+
raw_text = response.text
|
| 808 |
+
print(f"π₯ Received response (chars): {len(raw_text)}")
|
| 809 |
+
client_manager.reset_to_primary()
|
| 810 |
+
return raw_text
|
| 811 |
+
except Exception as e3:
|
| 812 |
+
print(f"β Second fallback also failed: {e3}")
|
| 813 |
+
|
| 814 |
+
# If we have more keys, try them
|
| 815 |
+
if attempt < max_attempts - 1:
|
| 816 |
+
if file_upload_obj and file_path:
|
| 817 |
+
print(f"π Rotating to next API key and re-uploading file...")
|
| 818 |
+
client_manager.rotate_to_next_key()
|
| 819 |
+
|
| 820 |
+
try:
|
| 821 |
+
print(f"π€ Re-uploading file with API key #{client_manager.current_key_index + 1}...")
|
| 822 |
+
current_file_obj = upload_to_gemini(file_path)
|
| 823 |
+
print(f"β
File re-uploaded successfully")
|
| 824 |
+
except Exception as upload_error:
|
| 825 |
+
print(f"β Failed to re-upload file: {upload_error}")
|
| 826 |
+
raise Exception(f"Failed to re-upload file with new API key: {upload_error}")
|
| 827 |
+
|
| 828 |
+
attempt += 1
|
| 829 |
+
print(f"π Retrying with next API key (attempt {attempt + 1}/{max_attempts})...")
|
| 830 |
+
continue
|
| 831 |
+
elif file_upload_obj and not file_path:
|
| 832 |
+
raise Exception(f"All models failed. Cannot rotate keys without file_path. Last error: {e3}")
|
| 833 |
+
else:
|
| 834 |
+
client_manager.rotate_to_next_key()
|
| 835 |
+
attempt += 1
|
| 836 |
+
print(f"π Trying next API key (attempt {attempt + 1}/{max_attempts})...")
|
| 837 |
+
continue
|
| 838 |
else:
|
| 839 |
+
raise Exception(f"All attempts failed. Last error: {e3}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
|
| 841 |
# If we exhausted all attempts
|
| 842 |
raise Exception(f"β All {max_attempts} API key(s) exhausted. Please check your quota or try again later.")
|
|
|
|
| 844 |
|
| 845 |
|
| 846 |
|
| 847 |
+
|
| 848 |
# ---------------- PARSERS ----------------
|
| 849 |
def extract_question_ids_from_qpms(text: str):
|
| 850 |
"""Extract question IDs from QP+MS transcript."""
|
|
|
|
| 1400 |
|
| 1401 |
print("1.i) Transcribing QP+MS (questions first, then full markscheme, with graph detection)...")
|
| 1402 |
qpms_prompt = QP_MS_TRANSCRIPTION_PROMPT["content"] + "\nAt the end, also list all questions in the markscheme where a graph is expected, in the format:\nGraph expected in:\n- Question <number> β Page <number>\n(One per line, after ==== MARKSCHEME END ====)"
|
| 1403 |
+
qpms_text = gemini_generate_content(qpms_prompt, file_upload_obj=merged_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", fallback_model_2="gemini-2.5-flash-lite", file_path=merged_qpms_path)
|
| 1404 |
print("π QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
|
| 1405 |
with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
|
| 1406 |
f.write(qpms_text)
|
|
|
|
| 1418 |
|
| 1419 |
print("1.ii) Building AS transcription prompt with expected question IDs and graph detection, sending to Gemini...")
|
| 1420 |
as_prompt = build_as_cot_prompt_with_expected_ids(extracted_ids, qpms_text) + "\nAt the end, also list all answers where a graph is found, in the format:\nGraph found in:\n- Answer <number> β Page <number>\n(One per line, after all answers)"
|
| 1421 |
+
as_text = gemini_generate_content(as_prompt, file_upload_obj=ans_uploaded, model_name="gemini-2.5-flash", fallback_model="gemini-2.5-flash-preview-09-2025", fallback_model_2="gemini-2.5-flash-lite", file_path=ans_path)
|
| 1422 |
print("π AS transcription received. Saving debug file: debug_as_transcript.txt")
|
| 1423 |
with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
|
| 1424 |
f.write(as_text)
|