fariedalfarizi commited on
Commit
7aaa8a4
·
1 Parent(s): c767135

Fix 3 issues: disable Whisper auto-correct, reduce timeout 120s, preload model to prevent double-run

Browse files
Files changed (3) hide show
  1. app.py +3 -1
  2. app/interface.py +5 -3
  3. core/scoring_engine.py +4 -2
app.py CHANGED
@@ -17,8 +17,10 @@ from app.api_gradio import create_api_interface
17
  if __name__ == '__main__':
18
  print('Starting Vocal Articulation Assessment System v2.0...')
19
 
20
- # Initialize model once
 
21
  initialize_model()
 
22
 
23
  # Create UI and API interfaces
24
  ui_demo = create_interface()
 
17
  if __name__ == '__main__':
18
  print('Starting Vocal Articulation Assessment System v2.0...')
19
 
20
+ # Initialize model once at startup (prevents double-run issue)
21
+ print('Preloading Whisper model...')
22
  initialize_model()
23
+ print('Model preloaded successfully!')
24
 
25
  # Create UI and API interfaces
26
  ui_demo = create_interface()
app/interface.py CHANGED
@@ -66,7 +66,7 @@ def get_status_icon(score: float) -> str:
66
  # GRADIO INFERENCE FUNCTION
67
  # =======================================
68
 
69
- @spaces.GPU(duration=180) # Max ZeroGPU duration for Whisper Medium
70
  def score_vocal(
71
  audio_file: str,
72
  target_text: str,
@@ -74,8 +74,10 @@ def score_vocal(
74
  ) -> Tuple[str, str, Dict, str]:
75
  """Score vocal audio - with safe error handling"""
76
  try:
77
- # Initialize model
78
- scorer = initialize_model()
 
 
79
 
80
  # Validate input
81
  if audio_file is None:
 
66
  # GRADIO INFERENCE FUNCTION
67
  # =======================================
68
 
69
+ @spaces.GPU(duration=120) # Reduced from 180s to avoid timeout
70
  def score_vocal(
71
  audio_file: str,
72
  target_text: str,
 
74
  ) -> Tuple[str, str, Dict, str]:
75
  """Score vocal audio - with safe error handling"""
76
  try:
77
+ # Use global scorer (already initialized)
78
+ global scorer
79
+ if scorer is None:
80
+ scorer = initialize_model()
81
 
82
  # Validate input
83
  if audio_file is None:
core/scoring_engine.py CHANGED
@@ -261,8 +261,10 @@ class AdvancedVocalScoringSystem:
261
  "task": "transcribe",
262
  "temperature": 0.0, # Deterministic output
263
  "compression_ratio_threshold": 2.4, # Default value
264
- "logprob_threshold": -1.0, # Add this to prevent logprobs error
265
- "no_speech_threshold": 0.6 # Default value
 
 
266
  }
267
  )
268
  transcription = result["text"].upper().strip() if result and "text" in result else ""
 
261
  "task": "transcribe",
262
  "temperature": 0.0, # Deterministic output
263
  "compression_ratio_threshold": 2.4, # Default value
264
+ "logprob_threshold": -1.0, # Prevent logprobs error
265
+ "no_speech_threshold": 0.6, # Default value
266
+ "condition_on_previous_text": False, # Disable context (prevent hallucination)
267
+ "prompt_ids": None # No prompt = no auto-correction
268
  }
269
  )
270
  transcription = result["text"].upper().strip() if result and "text" in result else ""