import time from api_wrappers import grazie_wrapper def build_prompt(prediction, reference): return f"""Your task is to rate the quality of the generated commit message using the scale from 1 to 5. A good commit message has to be concise. Assign lower scores for the commit messages that are too verbose for a commit message. The generated commit message you have to evaluate: START OF THE GENERATED COMMIT MESSAGE {prediction} END OF THE GENERATED COMMIT MESSAGE Here is an example of an ideal reference commit message for the same commit: START OF THE REFERENCE COMMIT MESSAGE {reference} END OF THE REFERENCE COMMIT MESSAGE All the information in the reference commit message is true. Print only one integer number after the token "OUTPUT" - the rating of the generated commit message. Do not print anything that is not an integer. OUTPUT """ N_RETRIES = 3 def compute(prediction, reference): prompt = build_prompt(prediction, reference) outputs = [] for i in range(N_RETRIES): try: output = grazie_wrapper.generate_for_prompt(prompt).strip()[-1] outputs.append(output) return int(output) except ValueError: continue raise RuntimeError(f"GPT4 cannot generate a number. Its outputs were: {str(outputs)}")