machine-translation

Build error

File size: 4,803 Bytes

a2100ac
 
 
 
 
44cfb92
 
 
 
 
 
 
 
 
 
 
 
 
 
a2100ac
 
 
 
 
 
 
 
 
44cfb92
 
a2100ac
 
44cfb92
 
 
a2100ac
 
 
 
 
44cfb92
 
 
 
 
 
 
a2100ac
44cfb92
 
 
a2100ac
44cfb92
 
a2100ac
44cfb92
a2100ac
44cfb92
 
 
77dd763
 
44cfb92
77dd763
44cfb92
 
 
 
 
 
 
 
 
 
 
 
a2100ac
44cfb92
 
 
 
 
 
 
 
 
a2100ac
 
44cfb92
a2100ac
 
44cfb92
a2100ac
 
44cfb92
 
a2100ac
 
 
 
 
 
44cfb92
a2100ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44cfb92
 
a2100ac
 
44cfb92
a2100ac

import os
import sys
import subprocess
from dotenv import find_dotenv, load_dotenv

found_dotenv = find_dotenv(".env")

if len(found_dotenv) == 0:
    found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=False)

workding_dir = os.path.dirname(found_dotenv)
os.chdir(workding_dir)
sys.path.append(workding_dir)
print("workding dir:", workding_dir)
print(f"adding {workding_dir} to sys.path")
sys.path.append(workding_dir)

from llm_toolkit.llm_utils import *
from llm_toolkit.translation_utils import *


def evaluate_model_all_epochs(
    model,
    tokenizer,
    model_name,
    adapter_path_base,
    dataset,
    results_path,
    start_epoch=0,
    end_epoch=-1,
    batch_size=1,
    max_new_tokens=300,
    device="cuda",
):
    if adapter_path_base is None:
        num_train_epochs = 0
        print(f"No adapter path provided. Running with base model:{model_name}")
    else:
        # find subdirectories in adapter_path_base
        # and sort them by epoch number
        subdirs = [
            d
            for d in os.listdir(adapter_path_base)
            if os.path.isdir(os.path.join(adapter_path_base, d))
        ]

        subdirs = sorted(subdirs, key=lambda x: int(x.split("-")[-1]))
        num_train_epochs = len(subdirs)
        print(f"found {num_train_epochs} checkpoints: {subdirs}")

        if end_epoch < 0 or end_epoch > num_train_epochs:
            end_epoch = num_train_epochs

        print(f"Running from epoch {start_epoch} to {end_epoch}")

    for i in range(start_epoch, end_epoch + 1):
        print(f"Epoch {i}")
        if i > 0:
            adapter_name = subdirs[i - 1]
            adapter_path = adapter_path_base + "/" + adapter_name
            print(f"loading adapter: {adapter_path}")
            model.load_adapter(adapter_path, adapter_name=adapter_name)
            model.active_adapters = adapter_name

        predictions = eval_model(
            model,
            tokenizer,
            dataset,
            device=device,
            batch_size=batch_size,
            max_new_tokens=max_new_tokens,
        )

        model_name_with_epochs = f"{model_name}/epochs-{i:02d}"

        save_results(
            model_name_with_epochs,
            results_path,
            dataset,
            predictions,
        )

        metrics = calc_metrics(dataset["english"], predictions, debug=True)
        print(f"{model_name_with_epochs} metrics: {metrics}")


if __name__ == "__main__":
    model_name = os.getenv("MODEL_NAME")
    adapter_path_base = os.getenv("ADAPTER_PATH_BASE")
    start_epoch = int(os.getenv("START_EPOCH", 1))
    end_epoch = os.getenv("END_EPOCH", -1)
    load_in_4bit = os.getenv("LOAD_IN_4BIT", "true").lower() == "true"
    results_path = os.getenv("RESULTS_PATH", None)
    data_path = os.getenv("DATA_PATH")

    print(
        model_name,
        adapter_path_base,
        load_in_4bit,
        start_epoch,
        results_path,
    )

    device = check_gpu()
    is_cuda = torch.cuda.is_available()

    print(f"Evaluating model: {model_name} on {device}")

    if is_cuda:
        torch.cuda.empty_cache()
        gpu_stats = torch.cuda.get_device_properties(0)
        start_gpu_memory = round(
            torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
        )
        max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
        print(f"(0) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
        print(f"{start_gpu_memory} GB of memory reserved.")

    model, tokenizer = load_model(model_name, load_in_4bit=load_in_4bit)

    datasets = load_translation_dataset(data_path, tokenizer, num_shots=0)
    print_row_details(datasets["test"].to_pandas())

    if is_cuda:
        gpu_stats = torch.cuda.get_device_properties(0)
        start_gpu_memory = round(
            torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
        )
        max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
        print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
        print(f"{start_gpu_memory} GB of memory reserved.")

    evaluate_model_all_epochs(
        model,
        tokenizer,
        model_name,
        adapter_path_base,
        datasets["test"],
        results_path,
        start_epoch=start_epoch,
        end_epoch=end_epoch,
        device=device,
    )

    if is_cuda:
        gpu_stats = torch.cuda.get_device_properties(0)
        start_gpu_memory = round(
            torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
        )
        max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
        print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
        print(f"{start_gpu_memory} GB of memory reserved.")