Spaces:

gradio
/

stream_asr

Running

App Files Files Community

freddyaboulton HF Staff commited on Sep 19, 2023

Commit

f9f2797

1 Parent(s): 15159c9

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +7 -7
requirements.txt +3 -0
run.ipynb +1 -0
run.py +27 -0

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Stream Asr
-emoji: 🌍
-colorFrom: gray
-colorTo: green
 sdk: gradio
 sdk_version: 3.44.4
-app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: stream_asr
+emoji: 🔥
+colorFrom: indigo
+colorTo: indigo
 sdk: gradio
 sdk_version: 3.44.4
+app_file: run.py
 pinned: false
+hf_oauth: true
 ---

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+torchaudio
+transformers

run.ipynb ADDED Viewed

	@@ -0,0 +1 @@

+ {"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: stream_asr"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(stream, new_chunk):\n", " sr, y = new_chunk\n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " if stream is not None:\n", " stream = np.concatenate([stream, y])\n", " else:\n", " stream = y\n", " return stream, transcriber({\"sampling_rate\": sr, \"raw\": stream})[\"text\"]\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [\"state\", gr.Audio(source=\"microphone\", streaming=True)],\n", " [\"state\", \"text\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

run.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import gradio as gr
+from transformers import pipeline
+import numpy as np
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+def transcribe(stream, new_chunk):
+    sr, y = new_chunk
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
+demo = gr.Interface(
+    transcribe,
+    ["state", gr.Audio(source="microphone", streaming=True)],
+    ["state", "text"],
+    live=True,
+)
+if __name__ == "__main__":
+    demo.launch()