Spaces:
Build error
Build error
ss
Browse files
app.py
CHANGED
@@ -13,17 +13,39 @@ model = whisper.load_model("base")
|
|
13 |
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
|
14 |
HF_TOKEN = os.environ["HF_TOKEN"]
|
15 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
|
|
16 |
|
17 |
# Text-to-Speech
|
18 |
LANGUAGES = list(CoquiTTS.langs.keys())
|
19 |
coquiTTS = CoquiTTS()
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# Processing input Audio
|
23 |
-
def
|
|
|
24 |
text1 = model.transcribe(audio)["text"]
|
25 |
text2 = lang_model_response(text1)
|
26 |
-
speech = tts(text,
|
27 |
return text1, text2, speech
|
28 |
|
29 |
def lang_model_response(prompt):
|
@@ -69,7 +91,7 @@ demo = gr.Interface(fn=tts, inputs=inputs, outputs=outputs)
|
|
69 |
demo.launch()
|
70 |
gr.Interface(
|
71 |
title = 'Testing Whisper',
|
72 |
-
fn=
|
73 |
inputs=[
|
74 |
gr.Audio(source="microphone", type="filepath"), #streaming = True,
|
75 |
# "state"
|
|
|
13 |
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
|
14 |
HF_TOKEN = os.environ["HF_TOKEN"]
|
15 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
16 |
+
#en, fr, esp, arb, hn, portu, Indonesian, Vietnamese, Chinese, tamil, telugu, bengali
|
17 |
|
18 |
# Text-to-Speech
|
19 |
LANGUAGES = list(CoquiTTS.langs.keys())
|
20 |
coquiTTS = CoquiTTS()
|
21 |
|
22 |
|
23 |
+
def whisper_stt(audio):
|
24 |
+
# load audio and pad/trim it to fit 30 seconds
|
25 |
+
audio = whisper.load_audio(audio)
|
26 |
+
audio = whisper.pad_or_trim(audio)
|
27 |
+
|
28 |
+
# make log-Mel spectrogram and move to the same device as the model
|
29 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
30 |
+
|
31 |
+
# detect the spoken language
|
32 |
+
_, probs = model.detect_language(mel)
|
33 |
+
print(f"Detected language: {max(probs, key=probs.get)}")
|
34 |
+
|
35 |
+
# decode the audio
|
36 |
+
options = whisper.DecodingOptions()
|
37 |
+
result = whisper.decode(model, mel, options)
|
38 |
+
|
39 |
+
# print the recognized text
|
40 |
+
print(f"transcript is : {result.text}")
|
41 |
+
return result.text
|
42 |
+
|
43 |
# Processing input Audio
|
44 |
+
def fun_engine(audio) :
|
45 |
+
#text1 = whisper_stt(audio)
|
46 |
text1 = model.transcribe(audio)["text"]
|
47 |
text2 = lang_model_response(text1)
|
48 |
+
speech = tts(text, 'en')
|
49 |
return text1, text2, speech
|
50 |
|
51 |
def lang_model_response(prompt):
|
|
|
91 |
demo.launch()
|
92 |
gr.Interface(
|
93 |
title = 'Testing Whisper',
|
94 |
+
fn=fun_engine,
|
95 |
inputs=[
|
96 |
gr.Audio(source="microphone", type="filepath"), #streaming = True,
|
97 |
# "state"
|