Mgolo commited on
Commit
3991d1f
·
verified ·
1 Parent(s): 724395a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -42
app.py CHANGED
@@ -29,7 +29,6 @@ import chardet
29
  from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
30
  from huggingface_hub import login
31
  import base64
32
- import io
33
 
34
  # ================================
35
  # Configuration & Constants
@@ -101,6 +100,9 @@ GITHUB_REPO = "mgolomanta/Models_Evaluation"
101
  EVALUATION_FILE = "evaluation.csv"
102
  GITHUB_TOKEN = os.getenv("git_tk") # Set this in your environment variables
103
 
 
 
 
104
  # ================================
105
  # Logging Configuration
106
  # ================================
@@ -442,11 +444,14 @@ class AudioProcessor:
442
  # ================================
443
 
444
  class EvaluationService:
445
- """Handles evaluation submissions and GitHub storage."""
446
 
447
  @staticmethod
448
  def escape_csv_field(text):
449
  """Escape text for CSV format."""
 
 
 
450
  if '"' in text:
451
  text = text.replace('"', '""')
452
  if ',' in text or '"' in text or '\n' in text:
@@ -454,35 +459,47 @@ class EvaluationService:
454
  return text
455
 
456
  @staticmethod
457
- def get_github_file_sha() -> Optional[str]:
458
- """Get the SHA of the existing evaluation file on GitHub."""
459
- try:
460
- url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
461
- headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
462
- response = requests.get(url, headers=headers)
463
-
464
- if response.status_code == 200:
465
- return response.json().get("sha")
466
- return None
467
- except Exception as e:
468
- logger.error(f"Error getting file SHA: {e}")
469
- return None
470
 
471
  @staticmethod
472
- def read_existing_csv_content() -> str:
473
- """Read existing CSV content from GitHub."""
 
 
 
 
 
 
 
474
  try:
475
- url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
476
- headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
477
- response = requests.get(url, headers=headers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
- if response.status_code == 200:
480
- content = response.json().get("content", "")
481
- return base64.b64decode(content).decode('utf-8')
482
- return ""
483
  except Exception as e:
484
- logger.error(f"Error reading existing CSV: {e}")
485
- return ""
486
 
487
  @staticmethod
488
  def save_evaluation_to_github(
@@ -494,7 +511,7 @@ class EvaluationService:
494
  correct_answer: Optional[str] = None
495
  ) -> str:
496
  """
497
- Save evaluation to GitHub CSV file.
498
 
499
  Args:
500
  source_lang: Source language name
@@ -508,19 +525,43 @@ class EvaluationService:
508
  Status message
509
  """
510
  try:
 
 
 
 
 
 
 
511
  # Escape fields for CSV
512
  source_lang_escaped = EvaluationService.escape_csv_field(source_lang)
513
  target_lang_escaped = EvaluationService.escape_csv_field(target_lang)
514
  user_input_escaped = EvaluationService.escape_csv_field(user_input)
515
  model_output_escaped = EvaluationService.escape_csv_field(model_output)
516
- notation_escaped = EvaluationService.escape_csv_field(notation if notation else "")
517
- correct_answer_escaped = EvaluationService.escape_csv_field(correct_answer if correct_answer else "")
518
 
519
  # Prepare the new evaluation data
520
  new_row = f"{source_lang_escaped},{target_lang_escaped},{user_input_escaped},{model_output_escaped},{notation_escaped},{correct_answer_escaped}\n"
521
 
522
- # Get existing content
523
- existing_content = EvaluationService.read_existing_csv_content()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
525
  # Check if file exists and has headers
526
  if existing_content.strip():
@@ -541,9 +582,6 @@ class EvaluationService:
541
  "Accept": "application/vnd.github.v3+json"
542
  }
543
 
544
- # Check if file exists to get SHA
545
- file_sha = EvaluationService.get_github_file_sha()
546
-
547
  # Prepare payload
548
  payload = {
549
  "message": "Add new evaluation",
@@ -561,11 +599,17 @@ class EvaluationService:
561
  return "✅ Evaluation submitted successfully to GitHub!"
562
  else:
563
  logger.error(f"GitHub API error: {response.status_code} - {response.text}")
564
- return f"❌ Error saving evaluation to GitHub: {response.status_code}"
 
 
 
565
 
566
  except Exception as e:
567
  logger.error(f"Failed to save evaluation to GitHub: {e}")
568
- return f"❌ Error saving evaluation: {str(e)}"
 
 
 
569
 
570
  # ================================
571
  # Main Application
@@ -628,10 +672,7 @@ class TranslationApp:
628
  notation: Optional[str],
629
  correct_answer: Optional[str]
630
  ) -> str:
631
- """Submit evaluation data to GitHub."""
632
- if not GITHUB_TOKEN:
633
- return "❌ GitHub token not configured. Please set GITHUB_TOKEN environment variable."
634
-
635
  if not user_input.strip() or not model_output.strip():
636
  return "⚠️ Please translate text before submitting evaluation."
637
 
@@ -844,9 +885,9 @@ def main():
844
  """Main application entry point."""
845
  # Check if GitHub token is set
846
  if not os.getenv("git_tk"):
847
- logger.warning("GITHUB_TOKEN environment variable not set. Evaluation submissions will fail.")
848
  print("⚠️ WARNING: GITHUB_TOKEN environment variable not set!")
849
- print(" Please set it to enable evaluation submissions to GitHub.")
850
 
851
  try:
852
  app = TranslationApp()
 
29
  from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
30
  from huggingface_hub import login
31
  import base64
 
32
 
33
  # ================================
34
  # Configuration & Constants
 
100
  EVALUATION_FILE = "evaluation.csv"
101
  GITHUB_TOKEN = os.getenv("git_tk") # Set this in your environment variables
102
 
103
+ # Local fallback file
104
+ LOCAL_EVALUATION_FILE = "evaluation.csv"
105
+
106
  # ================================
107
  # Logging Configuration
108
  # ================================
 
444
  # ================================
445
 
446
  class EvaluationService:
447
+ """Handles evaluation submissions with GitHub and local fallback."""
448
 
449
  @staticmethod
450
  def escape_csv_field(text):
451
  """Escape text for CSV format."""
452
+ if text is None:
453
+ return ""
454
+ text = str(text)
455
  if '"' in text:
456
  text = text.replace('"', '""')
457
  if ',' in text or '"' in text or '\n' in text:
 
459
  return text
460
 
461
  @staticmethod
462
+ def ensure_local_csv_exists():
463
+ """Ensure local CSV file exists with headers."""
464
+ if not os.path.exists(LOCAL_EVALUATION_FILE):
465
+ headers = "source_language_name,target_language_name,user_input,model_output,notation_value,correct_answer\n"
466
+ with open(LOCAL_EVALUATION_FILE, 'w', encoding='utf-8', newline='') as f:
467
+ f.write(headers)
 
 
 
 
 
 
 
468
 
469
  @staticmethod
470
+ def save_evaluation_locally(
471
+ source_lang: str,
472
+ target_lang: str,
473
+ user_input: str,
474
+ model_output: str,
475
+ notation: Optional[str] = None,
476
+ correct_answer: Optional[str] = None
477
+ ) -> str:
478
+ """Save evaluation to local CSV file."""
479
  try:
480
+ # Ensure file exists with headers
481
+ EvaluationService.ensure_local_csv_exists()
482
+
483
+ # Escape fields for CSV
484
+ source_lang_escaped = EvaluationService.escape_csv_field(source_lang)
485
+ target_lang_escaped = EvaluationService.escape_csv_field(target_lang)
486
+ user_input_escaped = EvaluationService.escape_csv_field(user_input)
487
+ model_output_escaped = EvaluationService.escape_csv_field(model_output)
488
+ notation_escaped = EvaluationService.escape_csv_field(notation)
489
+ correct_answer_escaped = EvaluationService.escape_csv_field(correct_answer)
490
+
491
+ # Prepare the new evaluation data
492
+ new_row = f"{source_lang_escaped},{target_lang_escaped},{user_input_escaped},{model_output_escaped},{notation_escaped},{correct_answer_escaped}\n"
493
+
494
+ # Append to file
495
+ with open(LOCAL_EVALUATION_FILE, 'a', encoding='utf-8', newline='') as f:
496
+ f.write(new_row)
497
+
498
+ return "✅ Evaluation saved locally!"
499
 
 
 
 
 
500
  except Exception as e:
501
+ logger.error(f"Failed to save evaluation locally: {e}")
502
+ return f"❌ Error saving evaluation locally: {str(e)}"
503
 
504
  @staticmethod
505
  def save_evaluation_to_github(
 
511
  correct_answer: Optional[str] = None
512
  ) -> str:
513
  """
514
+ Save evaluation to GitHub CSV file with fallback to local storage.
515
 
516
  Args:
517
  source_lang: Source language name
 
525
  Status message
526
  """
527
  try:
528
+ # First try to save to GitHub
529
+ if not GITHUB_TOKEN:
530
+ # Fallback to local if no token
531
+ return EvaluationService.save_evaluation_locally(
532
+ source_lang, target_lang, user_input, model_output, notation, correct_answer
533
+ )
534
+
535
  # Escape fields for CSV
536
  source_lang_escaped = EvaluationService.escape_csv_field(source_lang)
537
  target_lang_escaped = EvaluationService.escape_csv_field(target_lang)
538
  user_input_escaped = EvaluationService.escape_csv_field(user_input)
539
  model_output_escaped = EvaluationService.escape_csv_field(model_output)
540
+ notation_escaped = EvaluationService.escape_csv_field(notation)
541
+ correct_answer_escaped = EvaluationService.escape_csv_field(correct_answer)
542
 
543
  # Prepare the new evaluation data
544
  new_row = f"{source_lang_escaped},{target_lang_escaped},{user_input_escaped},{model_output_escaped},{notation_escaped},{correct_answer_escaped}\n"
545
 
546
+ # Try to read existing content from GitHub
547
+ existing_content = ""
548
+ file_sha = None
549
+
550
+ try:
551
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
552
+ headers = {
553
+ "Authorization": f"token {GITHUB_TOKEN}",
554
+ "Accept": "application/vnd.github.v3+json"
555
+ }
556
+ response = requests.get(url, headers=headers)
557
+
558
+ if response.status_code == 200:
559
+ file_data = response.json()
560
+ file_sha = file_data.get("sha")
561
+ content = file_data.get("content", "")
562
+ existing_content = base64.b64decode(content).decode('utf-8')
563
+ except Exception as e:
564
+ logger.warning(f"Could not read existing GitHub file: {e}")
565
 
566
  # Check if file exists and has headers
567
  if existing_content.strip():
 
582
  "Accept": "application/vnd.github.v3+json"
583
  }
584
 
 
 
 
585
  # Prepare payload
586
  payload = {
587
  "message": "Add new evaluation",
 
599
  return "✅ Evaluation submitted successfully to GitHub!"
600
  else:
601
  logger.error(f"GitHub API error: {response.status_code} - {response.text}")
602
+ # Fallback to local storage
603
+ return EvaluationService.save_evaluation_locally(
604
+ source_lang, target_lang, user_input, model_output, notation, correct_answer
605
+ )
606
 
607
  except Exception as e:
608
  logger.error(f"Failed to save evaluation to GitHub: {e}")
609
+ # Fallback to local storage
610
+ return EvaluationService.save_evaluation_locally(
611
+ source_lang, target_lang, user_input, model_output, notation, correct_answer
612
+ )
613
 
614
  # ================================
615
  # Main Application
 
672
  notation: Optional[str],
673
  correct_answer: Optional[str]
674
  ) -> str:
675
+ """Submit evaluation data."""
 
 
 
676
  if not user_input.strip() or not model_output.strip():
677
  return "⚠️ Please translate text before submitting evaluation."
678
 
 
885
  """Main application entry point."""
886
  # Check if GitHub token is set
887
  if not os.getenv("git_tk"):
888
+ logger.warning("GITHUB_TOKEN environment variable not set. Evaluations will be saved locally.")
889
  print("⚠️ WARNING: GITHUB_TOKEN environment variable not set!")
890
+ print(" Evaluations will be saved to local file only.")
891
 
892
  try:
893
  app = TranslationApp()