FOREIGN-WHISPERS

Sleeping

App Files Files Community

FOREIGN-WHISPERS / opus.py

RobCaamano

Update opus.py

c80f760 10 months ago

raw

history blame contribute delete

No virus

2.24 kB

	from transformers import MarianMTModel, MarianTokenizer
	from tqdm import tqdm
	import os
	import re
	import argparse

	# Load Model and Tokenizer
	model_name = "Helsinki-NLP/opus-mt-en-es"
	tokenizer = MarianTokenizer.from_pretrained(model_name)
	model = MarianMTModel.from_pretrained(model_name)

	# Extract & separate timestamp and text
	def extract_timestamp_and_text(line):
	match = re.match(r'\[(\d+\.\d+\-\d+\.\d+)\]\s+(.*)', line)
	if match:
	return match.group(1), match.group(2)
	return '', line

	# Translate text
	def translate_text(text):
	lines = text.split('\n')
	translated_lines = []

	for line in tqdm(lines, desc="Translating lines", leave=False):
	# Check if line empty
	if not line.strip():
	translated_lines.append('')
	continue

	timestamp, line_text = extract_timestamp_and_text(line)

	# Translate text
	if line_text.strip():
	model_inputs = tokenizer(line_text, return_tensors="pt", truncation=True, padding="longest")
	translated = model.generate(**model_inputs)
	translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
	translated_line = f'[{timestamp}] {translated_text}'
	else:
	translated_line = f'[{timestamp}]'

	translated_lines.append(translated_line)

	return '\n'.join(translated_lines)

	# Main function to translate a file
	def translate_file(src_file_path, dst_file_path):
	try:
	with open(src_file_path, 'r') as file:
	english_text = file.read()
	spanish_text = translate_text(english_text)

	with open(dst_file_path, 'w') as file:
	file.write(spanish_text)
	print(f"Translation completed: {dst_file_path}")

	except Exception as e:
	print(f"Error processing file: {e}")

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Translate English text to Spanish")
	parser.add_argument("src_file_path", help="Path to the source file with English text")
	parser.add_argument("dst_file_path", help="Path to save the translated Spanish text")
	args = parser.parse_args()

	translate_file(args.src_file_path, args.dst_file_path)