Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import torch | |
import numpy as np | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
wav2_ft = pipeline("automatic-speech-recognition",model='sanchit-gandhi/wav2vec2-large-tedlium',device=device,trust_remote_code=True) | |
app = gr.Blocks() | |
def inference(path): | |
out = wav2_ft( | |
path, | |
max_new_tokens=256, | |
chunk_length_s=30, | |
batch_size=8, | |
) | |
return out['text'] | |
def transcribe(stream, new_chunk): | |
sr, y = new_chunk | |
y = y.astype(np.float32) | |
y /= np.max(np.abs(y)) | |
if stream is not None: | |
stream = np.concatenate([stream, y]) | |
else: | |
stream = y | |
return stream, wav2_ft({"sampling_rate": sr, "raw": stream})["text"] | |
mic_mode = gr.Interface( | |
fn=inference, | |
inputs=gr.Audio(sources="microphone", type='filepath', label="Record Your Lecture"), | |
outputs=gr.Textbox(label="Transcription Output"), | |
title="ποΈ Recording & Transcribe", | |
description="Record through your mic. When you're done, hit stop and wait a moment. Feel free to trim the recording. Then, hit Submit!", | |
examples=[], | |
) | |
upload_mode = gr.Interface( | |
fn=inference, | |
inputs=gr.Audio(sources="upload", type='filepath', label="Upload Your Lecture Recording"), | |
outputs=gr.Textbox(label="Transcription Output"), | |
title="π Upload & Transcribe", | |
description="Have a recorded lecture? Upload the audio file here, and it'll be transcribed in seconds!", | |
) | |
# inspired by Gradio App Real Time Speech Recognition: https://www.gradio.app/guides/real-time-speech-recognition | |
live_mode = gr.Interface( | |
transcribe, | |
["state", gr.Audio(sources=["microphone"], streaming=True)], | |
["state", "text"], | |
title="π€ Live Transcription", | |
description="Transcribe your lecture in real-time! Start speaking into your microphone, and watch the transcription appear instantly.", | |
live=True, | |
) | |
with app: | |
gr.Markdown( | |
""" | |
# Lecture Transcription π | |
Welcome to **Lecture Transcription**, the go-to tool for transcribing lectures accurately. Whether youβre attending a live lecture or revisiting a recorded one, this app will ensure you donβt miss a single detail. | |
## How It Works | |
- **Recording Mode:** Record the lecture as it happens. When you stop, your transcription will be generated. | |
- **Upload Mode:** Upload your pre-recorded lecture audio files, and receive a precise transcription. Supports various audio formats including WAV, MP3, and more. | |
- **Live Mode:** That's right, low-latency live transcription! | |
## Optimized for Technical Oration | |
Under the hood, this is a Wav2Vec2 model fine-tuned on the TED-Lium dataset. It's well-versed for | |
accurately transcribing technical speech. | |
""" | |
) | |
gr.TabbedInterface( | |
[mic_mode, upload_mode,live_mode], | |
["ποΈ Record & Transcribe", "π Upload & Transcribe","π€ Live Transcribe"] | |
) | |
app.launch(debug=True) | |