Spaces:

bofenghuang
/

speech-to-text

Running

bofenghuang commited on Nov 4, 2022

Commit

f0b2cfd

1 Parent(s): d925c7e

switch to non-streaming mode

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Realtime ASR in French
 emoji: 👂
 colorFrom: green
 colorTo: indigo

 ---
+title: Speech-to-Text in French
 emoji: 👂
 colorFrom: green
 colorTo: indigo

app.py DELETED Viewed

@@ -1,28 +0,0 @@
-from transformers import pipeline
-import gradio as gr
-pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")
-def transcribe(audio, state=""):
-    text = pipe(audio, chunk_length_s=5, stride_length_s=1)["text"]
-    state += text + " "
-    return state, state
-# streaming mode
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(source="microphone", type="filepath", streaming=True, label="Record something..."),
-        "state"
-    ],
-    outputs=[
-        "textbox",
-        "state"
-    ],
-    title="Realtime ASR in French",
-    # description="Realtime demo for French ASR using a fine-tuned wav2vec2 model.",
-    allow_flagging="never",
-    live=True
-)
-iface.launch()

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ run_demo.py

run_demo.py ADDED Viewed

+from transformers import pipeline
+import gradio as gr
+pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")
+def transcribe(audio):
+    # text = pipe(audio, chunk_length_s=30, stride_length_s=5)["text"]
+    text = pipe(audio)["text"]
+    return text
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(source="microphone", type="filepath", label="Record something..."),
+    outputs="text",
+    title="Speech-to-Text in French",
+    description="Realtime demo for French automatic speech recognition.",
+    allow_flagging="never",
+)
+iface.launch()

run_demo_streaming.py ADDED Viewed

+from transformers import pipeline
+import gradio as gr
+pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr")
+def transcribe(audio, state=""):
+    text = pipe(audio, chunk_length_s=5, stride_length_s=1)["text"]
+    state += text + " "
+    return state, state
+# streaming mode
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=[gr.Audio(source="microphone", type="filepath", streaming=True, label="Record something..."), "state"],
+    outputs=["textbox", "state"],
+    title="Realtime Speech-to-Text in French",
+    description="Realtime demo for French automatic speech recognition.",
+    allow_flagging="never",
+    live=True,
+)
+iface.launch()