Spaces:

bektim
/

kzs2t

Runtime error

kzs2t / app.py

Create app.py

48cf8b9 verified 22 days ago

2.07 kB

	import gradio as gr
	import torch
	from transformers import AutoProcessor, SeamlessM4TModel

	class SeamlessM4TApp:
	def __init__(self):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {self.device}")

	# Load model and processor
	self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
	self.model = SeamlessM4TModel.from_pretrained("facebook/seamless-m4t-v2-large")
	self.model.to(self.device)

	def transcribe_audio(self, audio_path):
	try:
	# Load and process the audio
	audio_inputs = self.processor(
	audios=audio_path,
	return_tensors="pt",
	sampling_rate=16000
	).to(self.device)

	# Generate transcription
	with torch.no_grad():
	generated_tokens = self.model.generate(
	**audio_inputs,
	tgt_lang="eng",
	task="transcribe"
	)

	# Decode the generated tokens
	transcription = self.processor.decode(
	generated_tokens[0].tolist(),
	skip_special_tokens=True
	)

	return transcription

	except Exception as e:
	return f"Error during transcription: {str(e)}"

	# Initialize the Gradio interface
	def create_interface():
	app = SeamlessM4TApp()

	interface = gr.Interface(
	fn=app.transcribe_audio,
	inputs=gr.Audio(
	type="filepath",
	label="Upload Audio",
	source="microphone"
	),
	outputs=gr.Textbox(label="Transcription"),
	title="SeamlessM4T Speech-to-Text",
	description="Upload audio or use microphone to transcribe speech to text using SeamlessM4T model.",
	examples=[],
	cache_examples=False
	)

	return interface

	if __name__ == "__main__":
	interface = create_interface()
	interface.launch()