speech_recognize

Runtime error

App Files Files Community

mr2along commited on Oct 23, 2024

Commit

23f5423

verified ·

1 Parent(s): 8620a09

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -33

app.py CHANGED Viewed

@@ -7,12 +7,33 @@ from gtts import gTTS
 import io
 from pydub import AudioSegment
 import time
-from underthesea import phonetic
 # Create audio directory if it doesn't exist
 if not os.path.exists('audio'):
     os.makedirs('audio')
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
     if audio is None:
@@ -40,7 +61,7 @@ def transcribe_audio(audio):
         audio_data = recognizer.record(source)
     try:
-        transcription = recognizer.recognize_google(audio_data, language='vi-VN')  # For Vietnamese
         return transcription
     except sr.UnknownValueError:
         return "Google Speech Recognition could not understand the audio"
@@ -57,28 +78,7 @@ def create_pronunciation_audio(word):
     except Exception as e:
         return f"Failed to create pronunciation audio: {e}"
-# Upload function to Hugging Face Space
-def upfilepath(local_filename):
-    ts = time.time()
-    upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
-    files = {'files': open(local_filename, 'rb')}
-    try:
-        response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
-        if response.status_code == 200:
-            result = response.json()
-            extracted_path = result[0]
-            return extracted_path
-        else:
-            return None
-    except requests.exceptions.Timeout:
-        return "Request timed out. Please try again."
-    except Exception as e:
-        return f"An error occurred: {e}"
-# Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
@@ -100,6 +100,11 @@ def compare_texts(reference_text, transcribed_text):
     html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
     html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
     html_output += "<strong>Word Score List:</strong><br>"
     # Generate colored word score list
@@ -125,15 +130,12 @@ def compare_texts(reference_text, transcribed_text):
         for word, audio in incorrect_words_audios:
             suggestion = difflib.get_close_matches(word, reference_words, n=1)
             suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
-            up_audio = upfilepath(audio)
-            audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
             html_output += f'{word}: '
             html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
-    # Step 4: Vietnamese Phonetic Transcription
-    phonetic_transcription = phonetic(reference_text)
-    html_output += f"<br><strong>Phonetic Transcription (Vietnamese):</strong> {phonetic_transcription}<br>"
     return [html_output]
 # Step 4: Text-to-Speech Function
@@ -155,7 +157,7 @@ def gradio_function(paragraph, audio):
     # Return comparison result
     return comparison_result
 # Gradio Interface using the updated API
 interface = gr.Interface(
     fn=gradio_function,
@@ -164,8 +166,8 @@ interface = gr.Interface(
         gr.Audio(type="filepath", label="Record Audio")
     ],
     outputs=["html"],
-    title="Speech Recognition Comparison with Phonetic Transcription",
-    description="Input a paragraph, record your audio, and compare the transcription to the original text. Also, see phonetic transcription for Vietnamese."
 )
 # Gradio Interface for Text-to-Speech

 import io
 from pydub import AudioSegment
 import time
+import pronouncing  # Phonetic library
 # Create audio directory if it doesn't exist
 if not os.path.exists('audio'):
     os.makedirs('audio')
+# Function to upload file to server
+def upfilepath(local_filename):
+    ts = time.time()
+    upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
+    files = {'files': open(local_filename, 'rb')}
+    try:
+        response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
+        if response.status_code == 200:
+            result = response.json()
+            extracted_path = result[0]
+            return extracted_path
+        else:
+            return None
+    except requests.exceptions.Timeout:
+        return "Request timed out. Please try again."
+    except Exception as e:
+        return f"An error occurred: {e}"
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
     if audio is None:
         audio_data = recognizer.record(source)
     try:
+        transcription = recognizer.recognize_google(audio_data)
         return transcription
     except sr.UnknownValueError:
         return "Google Speech Recognition could not understand the audio"
     except Exception as e:
         return f"Failed to create pronunciation audio: {e}"
+# Step 3: Compare the transcribed text with the input paragraph and add phonetic transcription
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
     html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
     html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
+    # Add phonetic transcription for the entire sentence
+    phonetic_transcription = " ".join([pronouncing.phones_for_word(word)[0] if pronouncing.phones_for_word(word) else word for word in transcribed_words])
+    html_output += f"<strong>Phonetic Transcription:</strong> {phonetic_transcription}<br>"
     html_output += "<strong>Word Score List:</strong><br>"
     # Generate colored word score list
         for word, audio in incorrect_words_audios:
             suggestion = difflib.get_close_matches(word, reference_words, n=1)
             suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
+            up_audio=upfilepath(audio)
+            audio_src=f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
             html_output += f'{word}: '
             html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
+    # Return the final result with phonetic transcription
     return [html_output]
 # Step 4: Text-to-Speech Function
     # Return comparison result
     return comparison_result
 # Gradio Interface using the updated API
 interface = gr.Interface(
     fn=gradio_function,
         gr.Audio(type="filepath", label="Record Audio")
     ],
     outputs=["html"],
+    title="Speech Recognition Comparison",
+    description="Input a paragraph, record your audio, and compare the transcription to the original text."
 )
 # Gradio Interface for Text-to-Speech