Spaces:

ABAO77
/

Run_code_api

Running

App Files Files

xet

Community

ABAO77 commited on Sep 1

Commit

1a5420f

1 Parent(s): df380ff

fix: update import statement for BaseModel in agent.py and add timing logs in speaking_controller.py

Browse files

Files changed (2) hide show

src/agents/evaluation/agent.py +1 -1
src/apis/controllers/speaking_controller.py +10 -46

src/agents/evaluation/agent.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.pydantic_v1 import BaseModel, Field
 from src.config.llm import model
 from src.utils.logger import logger
 from .prompt import evaluation_prompt

 from langchain_core.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field
 from src.config.llm import model
 from src.utils.logger import logger
 from .prompt import evaluation_prompt

src/apis/controllers/speaking_controller.py CHANGED Viewed

@@ -17,8 +17,8 @@ from transformers import WhisperProcessor, WhisperForConditionalGeneration
 from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
 from loguru import logger
 import onnxruntime
-warnings.filterwarnings("ignore")
 # Download required NLTK data
 try:
@@ -66,7 +66,8 @@ class WhisperASR:
         Returns transcript and confidence score
         """
         try:
-            # Load audio
             audio, sr = librosa.load(audio_path, sr=self.sample_rate)
             # Process audio
@@ -95,7 +96,7 @@ class WhisperASR:
             # Convert to phoneme representation for comparison
             g2p = SimpleG2P()
             phoneme_representation = g2p.get_reference_phoneme_string(transcript)
             return {
                 "character_transcript": transcript,
                 "phoneme_representation": phoneme_representation,
@@ -179,49 +180,7 @@ class Wav2Vec2CharacterASRONNX:
         except ImportError as e:
             print(f"Error importing Wav2Vec2ONNXConverter: {e}")
-            # Fallback: use the convert_to_onnx.py directly if wav2vec2onnx.py doesn't work
-            self._fallback_create_onnx_model(onnx_model_path, processor_name)
-        except Exception as e:
-            print(f"Error creating ONNX model: {e}")
-            # Try fallback method
-            self._fallback_create_onnx_model(onnx_model_path, processor_name)
-    def _fallback_create_onnx_model(self, onnx_model_path: str, processor_name: str):
-        """Fallback method to create ONNX model using basic torch.onnx.export"""
-        try:
-            print("Using fallback method to create ONNX model...")
-            # Load PyTorch model
-            model = Wav2Vec2ForCTC.from_pretrained(processor_name)
-            model.eval()
-            # Create dummy input
-            dummy_input = torch.randn(1, 160000, dtype=torch.float32)
-            # Export to ONNX
-            with torch.no_grad():
-                torch.onnx.export(
-                    model,
-                    dummy_input,
-                    onnx_model_path,
-                    input_names=["input_values"],
-                    output_names=["logits"],
-                    dynamic_axes={
-                        "input_values": {0: "batch_size", 1: "sequence_length"},
-                        "logits": {0: "batch_size", 1: "sequence_length"},
-                    },
-                    opset_version=14,
-                    do_constant_folding=True,
-                    verbose=False,
-                    export_params=True,
-                )
-            print(f"✓ Fallback ONNX model created at: {onnx_model_path}")
-        except Exception as e:
-            print(f"Fallback method also failed: {e}")
-            raise Exception(f"Could not create ONNX model: {e}")
     def transcribe_to_characters(self, audio_path: str) -> Dict:
         """
@@ -230,6 +189,7 @@ class Wav2Vec2CharacterASRONNX:
         """
         try:
             # Load audio
             speech, sr = librosa.load(audio_path, sr=self.sample_rate)
             # Prepare input for ONNX
@@ -261,6 +221,9 @@ class Wav2Vec2CharacterASRONNX:
             # Calculate confidence scores
             confidence_scores = self._calculate_confidence_scores(logits)
             return {
                 "character_transcript": character_transcript,
@@ -934,6 +897,7 @@ class SimplePronunciationAssessor:
             print("Step 1: Using Whisper transcription...")
             asr_result = self.whisper_asr.transcribe_to_text(audio_path)
             model_info = f"Whisper ({self.whisper_asr.model_name})"
         character_transcript = asr_result["character_transcript"]
         phoneme_representation = asr_result["phoneme_representation"]

 from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
 from loguru import logger
 import onnxruntime
+import time
 # Download required NLTK data
 try:
         Returns transcript and confidence score
         """
         try:
+            start_time = time.time()
             audio, sr = librosa.load(audio_path, sr=self.sample_rate)
             # Process audio
             # Convert to phoneme representation for comparison
             g2p = SimpleG2P()
             phoneme_representation = g2p.get_reference_phoneme_string(transcript)
+            logger.info(f"Whisper transcription time: {time.time() - start_time:.2f}s")
             return {
                 "character_transcript": transcript,
                 "phoneme_representation": phoneme_representation,
         except ImportError as e:
             print(f"Error importing Wav2Vec2ONNXConverter: {e}")
+            raise e
     def transcribe_to_characters(self, audio_path: str) -> Dict:
         """
         """
         try:
             # Load audio
+            start_time = time.time()
             speech, sr = librosa.load(audio_path, sr=self.sample_rate)
             # Prepare input for ONNX
             # Calculate confidence scores
             confidence_scores = self._calculate_confidence_scores(logits)
+            logger.info(
+                f"Wav2Vec2 ONNX transcription time: {time.time() - start_time:.2f}s"
+            )
             return {
                 "character_transcript": character_transcript,
             print("Step 1: Using Whisper transcription...")
             asr_result = self.whisper_asr.transcribe_to_text(audio_path)
             model_info = f"Whisper ({self.whisper_asr.model_name})"
+            print(f"Whisper ASR result: {asr_result}")
         character_transcript = asr_result["character_transcript"]
         phoneme_representation = asr_result["phoneme_representation"]