from transformers import MarianMTModel, MarianTokenizer
from tqdm import tqdm
import os
import re
import argparse

# Load Model and Tokenizer
model_name = "Helsinki-NLP/opus-mt-en-es"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Extract & separate timestamp and text
def extract_timestamp_and_text(line):
    match = re.match(r'\[(\d+\.\d+\-\d+\.\d+)\]\s+(.*)', line)
    if match:
        return match.group(1), match.group(2)
    return '', line

# Translate text
def translate_text(text):
    lines = text.split('\n')
    translated_lines = []

    for line in tqdm(lines, desc="Translating lines", leave=False):
        # Check if line empty
        if not line.strip():
            translated_lines.append('')
            continue

        timestamp, line_text = extract_timestamp_and_text(line)

        # Translate text
        if line_text.strip():
            model_inputs = tokenizer(line_text, return_tensors="pt", truncation=True, padding="longest")
            translated = model.generate(**model_inputs)
            translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
            translated_line = f'[{timestamp}] {translated_text}'
        else:
            translated_line = f'[{timestamp}]'

        translated_lines.append(translated_line)

    return '\n'.join(translated_lines)

# Main function to translate a file
def translate_file(src_file_path, dst_file_path):
    try:
        with open(src_file_path, 'r') as file:
            english_text = file.read()
            spanish_text = translate_text(english_text)
        
        with open(dst_file_path, 'w') as file:
            file.write(spanish_text)
        print(f"Translation completed: {dst_file_path}")

    except Exception as e:
        print(f"Error processing file: {e}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Translate English text to Spanish")
    parser.add_argument("src_file_path", help="Path to the source file with English text")
    parser.add_argument("dst_file_path", help="Path to save the translated Spanish text")
    args = parser.parse_args()

    translate_file(args.src_file_path, args.dst_file_path)