Spaces:

adnaniqbal001
/

Translation_app

Build error

App Files Files Community

Translation_app / app.py

adnaniqbal001

Update app.py

054bba0 verified 7 months ago

raw

history blame contribute delete

2.99 kB

	import streamlit as st
	import torch
	from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, MarianMTModel, MarianTokenizer, Wav2Vec2CTCTokenizer
	import soundfile as sf
	import tempfile
	import numpy as np

	# Load models and tokenizers
	@st.cache_resource
	def load_models():
	try:
	# Load Wav2Vec2 for ASR (Multilingual model for Urdu support)
	# Load the tokenizer directly using Wav2Vec2CTCTokenizer
	tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-large-xlsr-53")
	# Then, initialize the processor with the tokenizer
	asr_processor = Wav2Vec2Processor(feature_extractor=asr_processor.feature_extractor, tokenizer=tokenizer)
	asr_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")

	# Load MarianMT for translation (Urdu to German)
	translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ur-de")
	translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ur-de")

	return asr_processor, asr_model, translation_tokenizer, translation_model

	except Exception as e:
	st.error(f"Error loading models: {e}")
	return None, None, None, None


	# Initialize models
	asr_processor, asr_model, translation_tokenizer, translation_model = load_models()

	# ... (rest of your app.py code remains the same)

	# Streamlit app interface
	st.title("Real-Time Urdu to German Voice Translator")
	st.markdown("Upload an Urdu audio file in `.wav` format, and the app will transcribe and translate it.")

	# File uploader
	uploaded_file = st.file_uploader("Upload your Urdu audio file (16kHz .wav)", type=["wav"])

	if uploaded_file is not None:
	with tempfile.NamedTemporaryFile(delete=False) as temp_file:
	temp_file.write(uploaded_file.read())
	temp_file_path = temp_file.name

	try:
	# Load and validate audio file
	audio_input, sample_rate = sf.read(temp_file_path)
	if sample_rate != 16000:
	st.error("Audio file must have a sampling rate of 16kHz.")
	else:
	st.info("Processing the audio...")

	# Step 1: Speech-to-Text (ASR)
	input_values = asr_processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
	with torch.no_grad():
	logits = asr_model(input_values).logits
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = asr_processor.batch_decode(predicted_ids)[0]

	st.text(f"Transcribed Urdu Text: {transcription}")

	# Step 2: Translate Text (Urdu to German)
	translated = translation_model.generate(**translation_tokenizer(transcription, return_tensors="pt", padding=True))
	german_translation = translation_tokenizer.decode(translated[0], skip_special_tokens=True)

	st.success(f"Translated German Text: {german_translation}")

	except Exception as e:
	st.error(f"An error occurred: {e}")