Spaces:
Runtime error
Runtime error
Steven Zhang
commited on
Commit
•
a194ba7
1
Parent(s):
0711ae3
accept wav file only
Browse files- AudioToText/condensedmodel.py +2 -5
- app.py +4 -3
AudioToText/condensedmodel.py
CHANGED
@@ -22,8 +22,6 @@ from tensorflow import keras
|
|
22 |
from keras import layers
|
23 |
import librosa
|
24 |
import speech_recognition as sr
|
25 |
-
import subprocess
|
26 |
-
import os
|
27 |
|
28 |
# MODEL LOSS
|
29 |
def CTCLoss(y_true, y_pred):
|
@@ -135,9 +133,8 @@ def loadWeights():
|
|
135 |
|
136 |
# Load CKPT to Model
|
137 |
model.load_weights(output)
|
138 |
-
s
|
139 |
def load_wav(filename):
|
140 |
-
wav,_ = librosa.load(
|
141 |
|
142 |
audio = tf.convert_to_tensor(
|
143 |
wav,
|
@@ -205,7 +202,7 @@ def AudioToTextUsingModel(wav_file):
|
|
205 |
return output_text
|
206 |
|
207 |
def AudioToTextUsingAPI(audio_file):
|
208 |
-
AUDIO_FILE =
|
209 |
|
210 |
# use the audio file as the audio source
|
211 |
|
22 |
from keras import layers
|
23 |
import librosa
|
24 |
import speech_recognition as sr
|
|
|
|
|
25 |
|
26 |
# MODEL LOSS
|
27 |
def CTCLoss(y_true, y_pred):
|
133 |
|
134 |
# Load CKPT to Model
|
135 |
model.load_weights(output)
|
|
|
136 |
def load_wav(filename):
|
137 |
+
wav,_ = librosa.load(filename, sr = 22050)
|
138 |
|
139 |
audio = tf.convert_to_tensor(
|
140 |
wav,
|
202 |
return output_text
|
203 |
|
204 |
def AudioToTextUsingAPI(audio_file):
|
205 |
+
AUDIO_FILE = audio_file
|
206 |
|
207 |
# use the audio file as the audio source
|
208 |
|
app.py
CHANGED
@@ -7,7 +7,7 @@ from AudioToText.condensedmodel import AudioToTextUsingAPI
|
|
7 |
from AudioToText.condensedmodel import AudioToTextUsingModel
|
8 |
|
9 |
|
10 |
-
st.title("Translation
|
11 |
|
12 |
option = st.selectbox("Select input type:", ("Text input", "Audio input"))
|
13 |
option2 = st.selectbox("Select translation language:", ("Spanish", "Chinese"))
|
@@ -25,9 +25,10 @@ if option == "Text input":
|
|
25 |
st.write(translated)
|
26 |
input_sentence = None
|
27 |
else:
|
28 |
-
wav_sentence = st.file_uploader("Upload
|
|
|
29 |
option3 = st.selectbox("Select audio to text model to use:", ("Our pretrained model", "Google API"))
|
30 |
-
if st.button("Submit
|
31 |
if option3 == "Our pretrained model":
|
32 |
input_list = AudioToTextUsingModel(wav_sentence)
|
33 |
input_sentence = "".join(input_list)
|
7 |
from AudioToText.condensedmodel import AudioToTextUsingModel
|
8 |
|
9 |
|
10 |
+
st.title("FIRE COML Summer 2022 Translation Model")
|
11 |
|
12 |
option = st.selectbox("Select input type:", ("Text input", "Audio input"))
|
13 |
option2 = st.selectbox("Select translation language:", ("Spanish", "Chinese"))
|
25 |
st.write(translated)
|
26 |
input_sentence = None
|
27 |
else:
|
28 |
+
wav_sentence = st.file_uploader("Upload an audio file (.wav):", type=\
|
29 |
+
["wav"])
|
30 |
option3 = st.selectbox("Select audio to text model to use:", ("Our pretrained model", "Google API"))
|
31 |
+
if st.button("Submit audio file"):
|
32 |
if option3 == "Our pretrained model":
|
33 |
input_list = AudioToTextUsingModel(wav_sentence)
|
34 |
input_sentence = "".join(input_list)
|