Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,6 @@ from kokoro import KPipeline
|
|
3 |
import soundfile as sf
|
4 |
import io
|
5 |
import os
|
6 |
-
import speech_recognition as sr
|
7 |
-
import romkan # For Japanese Romanization
|
8 |
|
9 |
# Install espeak-ng if not installed
|
10 |
if not os.system("which espeak-ng"):
|
@@ -85,26 +83,6 @@ def generate_audio(text, lang_code, voice, speed):
|
|
85 |
buffer.seek(0)
|
86 |
return buffer
|
87 |
|
88 |
-
# Transcribe the generated audio using speech recognition
|
89 |
-
def transcribe_audio(audio_buffer):
|
90 |
-
recognizer = sr.Recognizer()
|
91 |
-
with sr.AudioFile(audio_buffer) as source:
|
92 |
-
audio = recognizer.record(source)
|
93 |
-
try:
|
94 |
-
# Transcribe using Google Web Speech API (requires internet)
|
95 |
-
text = recognizer.recognize_google(audio)
|
96 |
-
return text
|
97 |
-
except sr.UnknownValueError:
|
98 |
-
return "Sorry, could not understand the audio"
|
99 |
-
except sr.RequestError as e:
|
100 |
-
return f"Request error from Google Speech Recognition service; {e}"
|
101 |
-
|
102 |
-
# Romanize (convert to Romanji) if the language is Japanese
|
103 |
-
def romanize_text(text, lang_code):
|
104 |
-
if lang_code == 'j': # Japanese language code
|
105 |
-
return romkan.to_roma(text) # Correct method to use
|
106 |
-
return text # No need for Romanization for other languages
|
107 |
-
|
108 |
# Generate and display the audio file
|
109 |
if st.button('Generate Audio'):
|
110 |
st.write("Generating speech...")
|
@@ -113,20 +91,10 @@ if st.button('Generate Audio'):
|
|
113 |
# Display Audio player in the app
|
114 |
st.audio(audio_buffer, format='audio/wav')
|
115 |
|
116 |
-
# Transcribe the generated speech to text
|
117 |
-
transcription = transcribe_audio(audio_buffer)
|
118 |
-
|
119 |
-
# Romanize the transcription if it's Japanese
|
120 |
-
romanized_text = romanize_text(transcription, lang_code)
|
121 |
-
|
122 |
-
# Display the transcribed and Romanized text
|
123 |
-
st.write("Transcribed Text: ", transcription)
|
124 |
-
st.write("Romanized Pronunciation: ", romanized_text)
|
125 |
-
|
126 |
# Optional: Save the generated audio file for download
|
127 |
st.download_button(
|
128 |
label="Download Audio",
|
129 |
data=audio_buffer,
|
130 |
file_name="generated_speech.wav",
|
131 |
mime="audio/wav"
|
132 |
-
)
|
|
|
3 |
import soundfile as sf
|
4 |
import io
|
5 |
import os
|
|
|
|
|
6 |
|
7 |
# Install espeak-ng if not installed
|
8 |
if not os.system("which espeak-ng"):
|
|
|
83 |
buffer.seek(0)
|
84 |
return buffer
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
# Generate and display the audio file
|
87 |
if st.button('Generate Audio'):
|
88 |
st.write("Generating speech...")
|
|
|
91 |
# Display Audio player in the app
|
92 |
st.audio(audio_buffer, format='audio/wav')
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
# Optional: Save the generated audio file for download
|
95 |
st.download_button(
|
96 |
label="Download Audio",
|
97 |
data=audio_buffer,
|
98 |
file_name="generated_speech.wav",
|
99 |
mime="audio/wav"
|
100 |
+
)
|