Spaces:
Running
Running
Commit
•
f9f2797
1
Parent(s):
15159c9
Upload folder using huggingface_hub
Browse files
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.44.4
|
8 |
-
app_file:
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
|
2 |
---
|
3 |
+
title: stream_asr
|
4 |
+
emoji: 🔥
|
5 |
+
colorFrom: indigo
|
6 |
+
colorTo: indigo
|
7 |
sdk: gradio
|
8 |
sdk_version: 3.44.4
|
9 |
+
app_file: run.py
|
10 |
pinned: false
|
11 |
+
hf_oauth: true
|
12 |
---
|
|
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
torchaudio
|
3 |
+
transformers
|
run.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: stream_asr"]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(stream, new_chunk):\n", " sr, y = new_chunk\n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " if stream is not None:\n", " stream = np.concatenate([stream, y])\n", " else:\n", " stream = y\n", " return stream, transcriber({\"sampling_rate\": sr, \"raw\": stream})[\"text\"]\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " [\"state\", gr.Audio(source=\"microphone\", streaming=True)],\n", " [\"state\", \"text\"],\n", " live=True,\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
run.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
6 |
+
|
7 |
+
def transcribe(stream, new_chunk):
|
8 |
+
sr, y = new_chunk
|
9 |
+
y = y.astype(np.float32)
|
10 |
+
y /= np.max(np.abs(y))
|
11 |
+
|
12 |
+
if stream is not None:
|
13 |
+
stream = np.concatenate([stream, y])
|
14 |
+
else:
|
15 |
+
stream = y
|
16 |
+
return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
|
17 |
+
|
18 |
+
|
19 |
+
demo = gr.Interface(
|
20 |
+
transcribe,
|
21 |
+
["state", gr.Audio(source="microphone", streaming=True)],
|
22 |
+
["state", "text"],
|
23 |
+
live=True,
|
24 |
+
)
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
demo.launch()
|