File size: 1,414 Bytes
36f36c5
d5c2b7f
36f36c5
d5c2b7f
 
 
 
 
 
 
56c8493
 
36f36c5
 
 
 
 
56c8493
 
d5c2b7f
 
 
 
 
 
 
 
 
 
 
 
 
56c8493
 
36f36c5
 
56c8493
d5c2b7f
36f36c5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import csv
from transformers import pipeline

# Charge le modèle depuis HF (téléchargé au moment de l'exécution)
generator = pipeline(
    "text-generation",
    model="Qwen/Qwen2.5-1.5B-Instruct",
    device_map="auto",
    max_new_tokens=100,
)

# Lit le test set monté par la plateforme
rows = list(csv.DictReader(open("/tmp/data/test.csv")))

with open("submission.csv", "w", newline="") as f:
    w = csv.DictWriter(f, fieldnames=["id", "pred", "explanation"])
    w.writeheader()

    for i, r in enumerate(rows):
        messages = [
            {
                "role": "system",
                "content": "You are solving a linguistics olympiad puzzle. Answer concisely: for translation/fill_blanks/editing give only the answer string, for mapping give only the option letter, for classification give only the symbol."
            },
            {
                "role": "user",
                "content": f"CONTEXT:\n{r['context']}\n\nQUESTION:\n{r['query']}\n\nAnswer only item number {r['question_number']}."
            }
        ]

        output = generator(messages)
        pred = output[0]["generated_text"][-1]["content"].strip()
        print(f"[{i+1}/{len(rows)}] id={r['id']}{pred[:50]}")

        w.writerow({
            "id": r["id"],
            "pred": pred,
            "explanation": "Qwen2.5-1.5B-Instruct zero-shot"
        })

print(f"Done: {len(rows)} rows written")