import time

from api_wrappers import grazie_wrapper


def build_prompt(prediction, reference):
    return f"""Your task is to rate the quality of the generated commit message using the scale from 1 to 5.

A good commit message has to be concise. 
Assign lower scores for the commit messages that are too verbose for a commit message.

The generated commit message you have to evaluate:
START OF THE GENERATED COMMIT MESSAGE 
{prediction}
END OF THE GENERATED COMMIT MESSAGE

Here is an example of an ideal reference commit message for the same commit:
START OF THE REFERENCE COMMIT MESSAGE
{reference}
END OF THE REFERENCE COMMIT MESSAGE

All the information in the reference commit message is true. 

Print only one integer number after the token "OUTPUT" - the rating of the generated commit message.
Do not print anything that is not an integer.

OUTPUT
"""


N_RETRIES = 3


def compute(prediction, reference):
    prompt = build_prompt(prediction, reference)
    outputs = []

    for i in range(N_RETRIES):
        try:
            output = grazie_wrapper.generate_for_prompt(prompt).strip()[-1]
            outputs.append(output)
            return int(output)
        except ValueError:
            continue

    raise RuntimeError(f"GPT4 cannot generate a number. Its outputs were: {str(outputs)}")