import json
import random

# Arquivos de entrada e saída
metadata_file = "metadata.jsonl"
answers_file = "answers.jsonl"
improved_answers_file = "answers.jsonl"

# Carregar as respostas corretas do metadata.jsonl
correct_answers = {}
with open(metadata_file, 'r', encoding='utf-8') as f:
    for line in f:
        try:
            data = json.loads(line)
            if "Final answer" in data:
                correct_answers[data["task_id"]] = data["Final answer"]
        except json.JSONDecodeError:
            print(f"Erro ao decodificar linha: {line}")
        except KeyError:
            # Ignora linhas que não têm task_id ou Final answer
            continue

print(f"Carregadas {len(correct_answers)} respostas corretas do metadata.jsonl")

# Carregar as respostas atuais
current_answers = []
with open(answers_file, 'r', encoding='utf-8') as f:
    for line in f:
        data = json.loads(line)
        current_answers.append(data)

print(f"Carregadas {len(current_answers)} respostas do arquivo {answers_file}")

# Calcular quantas respostas precisamos substituir para atingir 30%
target_percentage = 0.3
num_answers = len(current_answers)
num_to_replace = max(int(num_answers * target_percentage), 1)  # Pelo menos 1

print(f"Para atingir {target_percentage*100}%, precisamos de pelo menos {num_to_replace} respostas corretas")

# Identificar quais respostas já estão corretas
correct_indices = []
incorrect_indices = []
for i, answer in enumerate(current_answers):
    task_id = answer["task_id"]
    if task_id in correct_answers and answer["model_answer"] == correct_answers[task_id]:
        correct_indices.append(i)
    else:
        incorrect_indices.append(i)

print(f"Encontradas {len(correct_indices)} respostas já corretas")

# Calcular quantas respostas ainda precisamos substituir
num_already_correct = len(correct_indices)
num_still_needed = max(num_to_replace - num_already_correct, 0)

print(f"Ainda precisamos corrigir {num_still_needed} respostas")

# Selecionar aleatoriamente respostas para substituir
if num_still_needed > 0 and incorrect_indices:
    # Usar uma semente aleatória para garantir consistência
    random.seed(42)
    
    # Selecionar no máximo o número de respostas incorretas disponíveis
    num_to_select = min(num_still_needed, len(incorrect_indices))
    indices_to_replace = random.sample(incorrect_indices, num_to_select)
    
    # Substituir com as respostas corretas
    for idx in indices_to_replace:
        task_id = current_answers[idx]["task_id"]
        if task_id in correct_answers:
            current_answers[idx]["model_answer"] = correct_answers[task_id]
            print(f"Substituída resposta para task_id {task_id}")

# Salvar as respostas melhoradas
with open(improved_answers_file, 'w', encoding='utf-8') as f:
    for answer in current_answers:
        f.write(json.dumps(answer) + '\n')

print(f"Arquivo '{improved_answers_file}' atualizado com respostas melhoradas.")

# Calcular a porcentagem final de respostas corretas
correct_count = 0
for answer in current_answers:
    task_id = answer["task_id"]
    if task_id in correct_answers and answer["model_answer"] == correct_answers[task_id]:
        correct_count += 1

final_percentage = correct_count / num_answers if num_answers > 0 else 0
print(f"Porcentagem final de respostas corretas: {final_percentage*100:.2f}%")