mr2along commited on
Commit
23f5423
·
verified ·
1 Parent(s): 8620a09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -33
app.py CHANGED
@@ -7,12 +7,33 @@ from gtts import gTTS
7
  import io
8
  from pydub import AudioSegment
9
  import time
10
- from underthesea import phonetic
11
 
12
  # Create audio directory if it doesn't exist
13
  if not os.path.exists('audio'):
14
  os.makedirs('audio')
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Step 1: Transcribe the audio file
17
  def transcribe_audio(audio):
18
  if audio is None:
@@ -40,7 +61,7 @@ def transcribe_audio(audio):
40
  audio_data = recognizer.record(source)
41
 
42
  try:
43
- transcription = recognizer.recognize_google(audio_data, language='vi-VN') # For Vietnamese
44
  return transcription
45
  except sr.UnknownValueError:
46
  return "Google Speech Recognition could not understand the audio"
@@ -57,28 +78,7 @@ def create_pronunciation_audio(word):
57
  except Exception as e:
58
  return f"Failed to create pronunciation audio: {e}"
59
 
60
- # Upload function to Hugging Face Space
61
- def upfilepath(local_filename):
62
- ts = time.time()
63
- upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
64
- files = {'files': open(local_filename, 'rb')}
65
-
66
- try:
67
- response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds)
68
-
69
- if response.status_code == 200:
70
- result = response.json()
71
- extracted_path = result[0]
72
- return extracted_path
73
- else:
74
- return None
75
-
76
- except requests.exceptions.Timeout:
77
- return "Request timed out. Please try again."
78
- except Exception as e:
79
- return f"An error occurred: {e}"
80
-
81
- # Step 3: Compare the transcribed text with the input paragraph
82
  def compare_texts(reference_text, transcribed_text):
83
  reference_words = reference_text.split()
84
  transcribed_words = transcribed_text.split()
@@ -100,6 +100,11 @@ def compare_texts(reference_text, transcribed_text):
100
 
101
  html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
102
  html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
 
 
 
 
 
103
  html_output += "<strong>Word Score List:</strong><br>"
104
 
105
  # Generate colored word score list
@@ -125,15 +130,12 @@ def compare_texts(reference_text, transcribed_text):
125
  for word, audio in incorrect_words_audios:
126
  suggestion = difflib.get_close_matches(word, reference_words, n=1)
127
  suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
128
- up_audio = upfilepath(audio)
129
- audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
130
  html_output += f'{word}: '
131
  html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
132
 
133
- # Step 4: Vietnamese Phonetic Transcription
134
- phonetic_transcription = phonetic(reference_text)
135
- html_output += f"<br><strong>Phonetic Transcription (Vietnamese):</strong> {phonetic_transcription}<br>"
136
-
137
  return [html_output]
138
 
139
  # Step 4: Text-to-Speech Function
@@ -155,7 +157,7 @@ def gradio_function(paragraph, audio):
155
 
156
  # Return comparison result
157
  return comparison_result
158
-
159
  # Gradio Interface using the updated API
160
  interface = gr.Interface(
161
  fn=gradio_function,
@@ -164,8 +166,8 @@ interface = gr.Interface(
164
  gr.Audio(type="filepath", label="Record Audio")
165
  ],
166
  outputs=["html"],
167
- title="Speech Recognition Comparison with Phonetic Transcription",
168
- description="Input a paragraph, record your audio, and compare the transcription to the original text. Also, see phonetic transcription for Vietnamese."
169
  )
170
 
171
  # Gradio Interface for Text-to-Speech
 
7
  import io
8
  from pydub import AudioSegment
9
  import time
10
+ import pronouncing # Phonetic library
11
 
12
  # Create audio directory if it doesn't exist
13
  if not os.path.exists('audio'):
14
  os.makedirs('audio')
15
 
16
+ # Function to upload file to server
17
+ def upfilepath(local_filename):
18
+ ts = time.time()
19
+ upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
20
+ files = {'files': open(local_filename, 'rb')}
21
+
22
+ try:
23
+ response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds)
24
+
25
+ if response.status_code == 200:
26
+ result = response.json()
27
+ extracted_path = result[0]
28
+ return extracted_path
29
+ else:
30
+ return None
31
+
32
+ except requests.exceptions.Timeout:
33
+ return "Request timed out. Please try again."
34
+ except Exception as e:
35
+ return f"An error occurred: {e}"
36
+
37
  # Step 1: Transcribe the audio file
38
  def transcribe_audio(audio):
39
  if audio is None:
 
61
  audio_data = recognizer.record(source)
62
 
63
  try:
64
+ transcription = recognizer.recognize_google(audio_data)
65
  return transcription
66
  except sr.UnknownValueError:
67
  return "Google Speech Recognition could not understand the audio"
 
78
  except Exception as e:
79
  return f"Failed to create pronunciation audio: {e}"
80
 
81
+ # Step 3: Compare the transcribed text with the input paragraph and add phonetic transcription
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def compare_texts(reference_text, transcribed_text):
83
  reference_words = reference_text.split()
84
  transcribed_words = transcribed_text.split()
 
100
 
101
  html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
102
  html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
103
+
104
+ # Add phonetic transcription for the entire sentence
105
+ phonetic_transcription = " ".join([pronouncing.phones_for_word(word)[0] if pronouncing.phones_for_word(word) else word for word in transcribed_words])
106
+ html_output += f"<strong>Phonetic Transcription:</strong> {phonetic_transcription}<br>"
107
+
108
  html_output += "<strong>Word Score List:</strong><br>"
109
 
110
  # Generate colored word score list
 
130
  for word, audio in incorrect_words_audios:
131
  suggestion = difflib.get_close_matches(word, reference_words, n=1)
132
  suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
133
+ up_audio=upfilepath(audio)
134
+ audio_src=f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
135
  html_output += f'{word}: '
136
  html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
137
 
138
+ # Return the final result with phonetic transcription
 
 
 
139
  return [html_output]
140
 
141
  # Step 4: Text-to-Speech Function
 
157
 
158
  # Return comparison result
159
  return comparison_result
160
+
161
  # Gradio Interface using the updated API
162
  interface = gr.Interface(
163
  fn=gradio_function,
 
166
  gr.Audio(type="filepath", label="Record Audio")
167
  ],
168
  outputs=["html"],
169
+ title="Speech Recognition Comparison",
170
+ description="Input a paragraph, record your audio, and compare the transcription to the original text."
171
  )
172
 
173
  # Gradio Interface for Text-to-Speech