"""Config for analyzing GPT-MT."""

from __future__ import annotations

from collections.abc import Callable
from dataclasses import dataclass

from zeno_build.evaluation.text_features.capitalization import input_capital_char_ratio
from zeno_build.evaluation.text_features.exact_match import avg_exact_match, exact_match
from zeno_build.evaluation.text_features.frequency import output_max_word_freq
from zeno_build.evaluation.text_features.length import (
    doc_context_length,
    input_length,
    label_length,
    output_length,
)
from zeno_build.evaluation.text_metrics.critique import (
    avg_bert_score,
    avg_chrf,
    avg_comet,
    avg_length_ratio,
    bert_score,
    chrf,
    comet,
    length_ratio,
)
from zeno_build.experiments import search_space

from modeling import remove_leading_language

lang_pairs: dict[str, list[str]] = {
    # All language pairs used in any experiment
    "all_lang_pairs": [
        "csen",
        "deen",
        "defr",
        "encs",
        "ende",
        "enha",
        "enis",
        "enja",
        "enru",
        "enuk",
        "enzh",
        "frde",
        "haen",
        "isen",
        "jaen",
        "ruen",
        "uken",
        "zhen",
    ],
    # Language pairs used in the experiments on a limited number of language pairs
    "limited_lang_pairs": [
        "deen",
        "defr",
        "ende",
        "enru",
        "enzh",
        "frde",
        "ruen",
        "zhen",
    ],
}

# The search space for the main experiments
main_space = search_space.CombinatorialSearchSpace(
    {
        "lang_pairs": search_space.Constant("all_lang_pairs"),
        "model_preset": search_space.Categorical(
            [
                "text-davinci-003-zeroshot",
                "text-davinci-003-RR-1-shot",
                "text-davinci-003-RR-5-shot",
                "text-davinci-003-QR-1-shot",
                "text-davinci-003-QR-5-shot",
                "gpt-3.5-turbo-0301-zeroshot",
                "gpt-4-0314-zeroshot",
                "gpt-4-0314-zeroshot-postprocess",
                "MS-Translator",
                "google-cloud",
                "wmt-best",
            ]
        ),
    }
)


@dataclass(frozen=True)
class GptMtConfig:
    """Config for gpt-MT models."""

    path: str
    base_model: str
    prompt_strategy: str | None = None
    prompt_shots: int | None = None
    post_processors: list[Callable[[str], str]] | None = None


# The details of each model
model_configs = {
    "text-davinci-003-RR-1-shot": GptMtConfig(
        "text-davinci-003/RR/1-shot", "text-davinci-003", "RR", 1
    ),
    "text-davinci-003-RR-5-shot": GptMtConfig(
        "text-davinci-003/RR/5-shot", "text-davinci-003", "RR", 5
    ),
    "text-davinci-003-QR-1-shot": GptMtConfig(
        "text-davinci-003/QR/1-shot", "text-davinci-003", "QR", 1
    ),
    "text-davinci-003-QR-5-shot": GptMtConfig(
        "text-davinci-003/QR/5-shot", "text-davinci-003", "QR", 5
    ),
    "text-davinci-003-zeroshot": GptMtConfig(
        "text-davinci-003/zeroshot", "text-davinci-003", None, 0
    ),
    "gpt-3.5-turbo-0301-zeroshot": GptMtConfig(
        "gpt-3.5-turbo-0301/zeroshot", "gpt-3.5-turbo-0301", None, 0
    ),
    "gpt-4-0314-zeroshot": GptMtConfig("gpt-4-0314/zeroshot", "gpt-4-0314", None, 0),
    "gpt-4-0314-zeroshot-postprocess": GptMtConfig(
        "gpt-4-0314/zeroshot", "gpt-4-0314", None, 0, [remove_leading_language]
    ),
    "MS-Translator": GptMtConfig("MS-Translator", "MS-Translator"),
    "google-cloud": GptMtConfig("google-cloud", "google-cloud"),
    "wmt-best": GptMtConfig("wmt-best", "wmt-best"),
}

sweep_distill_functions = [chrf]
sweep_metric_function = avg_chrf

# The functions used for Zeno visualization
zeno_distill_and_metric_functions = [
    output_length,
    input_length,
    label_length,
    doc_context_length,
    input_capital_char_ratio,
    output_max_word_freq,
    chrf,
    comet,
    length_ratio,
    bert_score,
    exact_match,
    avg_chrf,
    avg_comet,
    avg_length_ratio,
    avg_bert_score,
    avg_exact_match,
]