Spaces:

srkvatsa
/

Lecture-Transcription

Sleeping

Srivatsa Kundurthy

update app

b6e138e 6 months ago

3.03 kB

	import gradio as gr
	from transformers import pipeline
	import torch
	import numpy as np
	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	wav2_ft = pipeline("automatic-speech-recognition",model='sanchit-gandhi/wav2vec2-large-tedlium',device=device,trust_remote_code=True)


	app = gr.Blocks()

	def inference(path):
	out = wav2_ft(
	path,
	max_new_tokens=256,
	chunk_length_s=30,
	batch_size=8,
	)
	return out['text']


	def transcribe(stream, new_chunk):
	sr, y = new_chunk
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	if stream is not None:
	stream = np.concatenate([stream, y])
	else:
	stream = y
	return stream, wav2_ft({"sampling_rate": sr, "raw": stream})["text"]

	mic_mode = gr.Interface(
	fn=inference,
	inputs=gr.Audio(sources="microphone", type='filepath', label="Record Your Lecture"),
	outputs=gr.Textbox(label="Transcription Output"),
	title="🎙️ Recording & Transcribe",
	description="Record through your mic. When you're done, hit stop and wait a moment. Feel free to trim the recording. Then, hit Submit!",
	examples=[],
	)


	upload_mode = gr.Interface(
	fn=inference,
	inputs=gr.Audio(sources="upload", type='filepath', label="Upload Your Lecture Recording"),
	outputs=gr.Textbox(label="Transcription Output"),
	title="📂 Upload & Transcribe",
	description="Have a recorded lecture? Upload the audio file here, and it'll be transcribed in seconds!",
	)

	# inspired by Gradio App Real Time Speech Recognition: https://www.gradio.app/guides/real-time-speech-recognition
	live_mode = gr.Interface(
	transcribe,
	["state", gr.Audio(sources=["microphone"], streaming=True)],
	["state", "text"],
	title="🎤 Live Transcription",
	description="Transcribe your lecture in real-time! Start speaking into your microphone, and watch the transcription appear instantly.",
	live=True,
	)



	with app:
	gr.Markdown(
	"""
	# Lecture Transcription 📝

	Welcome to Lecture Transcription, the go-to tool for transcribing lectures accurately. Whether you’re attending a live lecture or revisiting a recorded one, this app will ensure you don’t miss a single detail.

	## How It Works
	- Recording Mode: Record the lecture as it happens. When you stop, your transcription will be generated.
	- Upload Mode: Upload your pre-recorded lecture audio files, and receive a precise transcription. Supports various audio formats including WAV, MP3, and more.
	- Live Mode: That's right, low-latency live transcription!

	## Optimized for Technical Oration
	Under the hood, this is a Wav2Vec2 model fine-tuned on the TED-Lium dataset. It's well-versed for
	accurately transcribing technical speech.


	"""
	)
	gr.TabbedInterface(
	[mic_mode, upload_mode,live_mode],
	["🎙️ Record & Transcribe", "📂 Upload & Transcribe","🎤 Live Transcribe"]
	)


	app.launch(debug=True)