File size: 1,125 Bytes
ce712db 4fd266f ce712db 4fd266f ce712db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import json
# Caminhos dos arquivos
input_file = "answers_complete.jsonl"
output_file = "answers.jsonl"
# Ler o arquivo original
entries = []
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
if line.strip(): # Ignora linhas vazias
entry = json.loads(line.strip())
# Remover o prefixo "FINAL ANSWER: " da resposta
model_answer = entry.get("model_answer", "")
if model_answer.startswith("FINAL ANSWER: "):
model_answer = model_answer[14:] # Remove os primeiros 14 caracteres
# Criar entrada no formato correto
new_entry = {
"task_id": entry["task_id"],
"model_answer": model_answer
}
entries.append(new_entry)
# Escrever o arquivo corrigido
with open(output_file, 'w', encoding='utf-8') as f:
for entry in entries:
json.dump(entry, f, ensure_ascii=False)
f.write('\n')
print(f"Respostas copiadas de {input_file} para {output_file}")
print(f"Total de entradas processadas: {len(entries)}") |