Spaces:

yasserrmd
/

revai_reverb_asr

Running

revai_reverb_asr / app.py

Update app.py

daff1a1 verified 1 day ago

1.66 kB

	import gradio as gr
	import torch
	from wenet.cli.model import load_model
	from huggingface_hub import hf_hub_download
	#import spaces

	REPO_ID = "Revai/reverb-asr"
	files = ['reverb_asr_v1.jit.zip', 'tk.units.txt']
	downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
	model = load_model(downloaded_files[0], downloaded_files[1])



	def process_cat_embs(style):
	device = torch.device("cpu")
	cat_embs = torch.tensor([float(c) for c in style.split(',')]).to(device)
	return cat_embs


	#@spaces.GPU
	def transcribe_audio(audio, style=0):
	if not audio:
	return "Input Error! Please enter one audio!"

	cat_embs = process_cat_embs(f'{style},{1-style}')
	result = model.transcribe(audio, cat_embs=cat_embs)

	if not result or 'text' not in result:
	return "ERROR! No text output! Please try again!"

	text_output = result['text'].replace('▁', ' ')
	return text_output



	audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
	style_slider = gr.Slider(0, 1, value=0, step=0.1, label="Transcription Style",
	info="Adjust the transcription style: 0 (casual) to 1 (formal).")
	output_textbox = gr.Textbox(label="Transcription Output")

	description = "This tool transcribes audio using a customizable transcription style ranging from casual to formal. Upload or record an audio file to begin."

	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=[audio_input, style_slider],
	outputs=output_textbox,
	title="Audio Transcription",
	description=description,
	theme="default"
	)


	iface.launch()