File size: 1,125 Bytes
ce712db
 
 
 
 
 
 
 
 
 
 
 
 
4fd266f
 
 
 
 
ce712db
 
 
4fd266f
ce712db
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import json

# Caminhos dos arquivos
input_file = "answers_complete.jsonl"
output_file = "answers.jsonl"

# Ler o arquivo original
entries = []
with open(input_file, 'r', encoding='utf-8') as f:
    for line in f:
        if line.strip():  # Ignora linhas vazias
            entry = json.loads(line.strip())
            
            # Remover o prefixo "FINAL ANSWER: " da resposta
            model_answer = entry.get("model_answer", "")
            if model_answer.startswith("FINAL ANSWER: "):
                model_answer = model_answer[14:]  # Remove os primeiros 14 caracteres
            
            # Criar entrada no formato correto
            new_entry = {
                "task_id": entry["task_id"],
                "model_answer": model_answer
            }
            
            entries.append(new_entry)

# Escrever o arquivo corrigido
with open(output_file, 'w', encoding='utf-8') as f:
    for entry in entries:
        json.dump(entry, f, ensure_ascii=False)
        f.write('\n')

print(f"Respostas copiadas de {input_file} para {output_file}")
print(f"Total de entradas processadas: {len(entries)}")