Spaces:

srkvatsa
/

Lecture-Transcription

Sleeping

File size: 3,031 Bytes

dfa4663
4ca677f
ac1dae2
b6e138e
ac1dae2
dfa4663
4ca677f
dfa4663
ac1dae2
4ca677f
dfa4663
4ca677f
 
 
 
 
 
 
 
dfa4663
b6e138e
 
 
 
 
 
 
 
 
 
 
 
dfa4663
 
 
 
b6e138e
dfa4663
 
 
 
 
 
 
 
 
b6e138e
dfa4663
 
 
b6e138e
 
 
 
 
 
 
 
 
 
 
dfa4663
 
 
 
 
 
 
 
 
 
 
b6e138e
dfa4663
 
 
 
 
 
 
 
 
b6e138e
 
dfa4663
 
b6e138e

import gradio as gr
from transformers import pipeline
import torch
import numpy as np
device = "cuda:0" if torch.cuda.is_available() else "cpu"

wav2_ft = pipeline("automatic-speech-recognition",model='sanchit-gandhi/wav2vec2-large-tedlium',device=device,trust_remote_code=True)


app = gr.Blocks()

def inference(path):
  out = wav2_ft(
      path,
      max_new_tokens=256,
      chunk_length_s=30,
      batch_size=8,
  )
  return out['text']


def transcribe(stream, new_chunk):
    sr, y = new_chunk
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if stream is not None:
        stream = np.concatenate([stream, y])
    else:
        stream = y
    return stream, wav2_ft({"sampling_rate": sr, "raw": stream})["text"] 

mic_mode = gr.Interface(
    fn=inference,
    inputs=gr.Audio(sources="microphone", type='filepath', label="Record Your Lecture"),
    outputs=gr.Textbox(label="Transcription Output"),
    title="🎙️ Recording & Transcribe",
    description="Record through your mic. When you're done, hit stop and wait a moment. Feel free to trim the recording. Then, hit Submit!",
    examples=[],
)


upload_mode = gr.Interface(
    fn=inference,
    inputs=gr.Audio(sources="upload", type='filepath', label="Upload Your Lecture Recording"),
    outputs=gr.Textbox(label="Transcription Output"),
    title="📂 Upload & Transcribe",
    description="Have a recorded lecture? Upload the audio file here, and it'll be transcribed in seconds!",
)

# inspired by Gradio App Real Time Speech Recognition: https://www.gradio.app/guides/real-time-speech-recognition
live_mode = gr.Interface(
    transcribe,
    ["state", gr.Audio(sources=["microphone"], streaming=True)],
    ["state", "text"],
    title="🎤 Live Transcription",
    description="Transcribe your lecture in real-time! Start speaking into your microphone, and watch the transcription appear instantly.",
    live=True,
)



with app:
    gr.Markdown(
        """
        # Lecture Transcription 📝
        
        Welcome to **Lecture Transcription**, the go-to tool for transcribing lectures accurately. Whether you’re attending a live lecture or revisiting a recorded one, this app will ensure you don’t miss a single detail.

        ## How It Works
        - **Recording Mode:** Record the lecture as it happens. When you stop, your transcription will be generated.
        - **Upload Mode:** Upload your pre-recorded lecture audio files, and receive a precise transcription. Supports various audio formats including WAV, MP3, and more.
        - **Live Mode:** That's right, low-latency live transcription!

        ## Optimized for Technical Oration
        Under the hood, this is a Wav2Vec2 model fine-tuned on the TED-Lium dataset. It's well-versed for 
        accurately transcribing technical speech.
  
        
        """
    )
    gr.TabbedInterface(
        [mic_mode, upload_mode,live_mode],
        ["🎙️ Record & Transcribe", "📂 Upload & Transcribe","🎤 Live Transcribe"]
    )


app.launch(debug=True)