import json # Caminhos dos arquivos input_file = "answers_complete.jsonl" output_file = "answers.jsonl" # Ler o arquivo original entries = [] with open(input_file, 'r', encoding='utf-8') as f: for line in f: if line.strip(): # Ignora linhas vazias entry = json.loads(line.strip()) # Remover o prefixo "FINAL ANSWER: " da resposta model_answer = entry.get("model_answer", "") if model_answer.startswith("FINAL ANSWER: "): model_answer = model_answer[14:] # Remove os primeiros 14 caracteres # Criar entrada no formato correto new_entry = { "task_id": entry["task_id"], "model_answer": model_answer } entries.append(new_entry) # Escrever o arquivo corrigido with open(output_file, 'w', encoding='utf-8') as f: for entry in entries: json.dump(entry, f, ensure_ascii=False) f.write('\n') print(f"Respostas copiadas de {input_file} para {output_file}") print(f"Total de entradas processadas: {len(entries)}")