Spaces:

Detomo
/

audio-translate

Runtime error

App Files Files Community

audio-translate / app.py

vumichien

Update app.py

1f6bb18 almost 2 years ago

raw history blame

No virus

1.99 kB

	import gradio as gr
	import librosa
	from optimum.onnxruntime import ORTModelForSeq2SeqLM
	from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import torch

	# load model and processor
	processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english")
	model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english")

	tokenizer = AutoTokenizer.from_pretrained("icon-it-tdtu/mt-en-vi-optimum")
	model_lm = ORTModelForSeq2SeqLM.from_pretrained("icon-it-tdtu/mt-en-vi-optimum")


	def process_audio_file(file):
	data, sr = librosa.load(file)
	if sr != 16000:
	data = librosa.resample(data, sr, 16000)
	print(data.shape)
	inputs = processor(data, sampling_rate=16000, return_tensors="pt", padding=True)
	return inputs


	def interpret(file):
	inputs = process_audio_file(file)
	with torch.no_grad():
	output_logit = model(inputs.input_values).logits
	pred_ids = torch.argmax(output_logit, dim=-1)
	text = processor.batch_decode(pred_ids)[0].lower()
	print(text)
	translate_text = translate(text)
	return translate_text


	def translate(text):
	batch = tokenizer([text], return_tensors="pt")
	generated_ids = model_lm.generate(**batch)
	translated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return translated_text

	# Set the starting state to an empty string
	iface = gr.Interface(
	fn=interpret,
	title="Interpret English to Vietnamese",
	description="A simple interface to interpret from spoken English to Vietnamese.",
	article="Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>.",
	inputs=[
	gr.Audio(source="microphone", type="filepath", streaming=False),
	gr.Audio(source="upload", type="filepath", optional=True),
	],
	outputs=gr.outputs.Textbox(label="Interpreted text")
	,
	)
	iface.launch(enable_queue=True, debug=True)