File size: 1,414 Bytes
36f36c5 d5c2b7f 36f36c5 d5c2b7f 56c8493 36f36c5 56c8493 d5c2b7f 56c8493 36f36c5 56c8493 d5c2b7f 36f36c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | import csv
from transformers import pipeline
# Charge le modèle depuis HF (téléchargé au moment de l'exécution)
generator = pipeline(
"text-generation",
model="Qwen/Qwen2.5-1.5B-Instruct",
device_map="auto",
max_new_tokens=100,
)
# Lit le test set monté par la plateforme
rows = list(csv.DictReader(open("/tmp/data/test.csv")))
with open("submission.csv", "w", newline="") as f:
w = csv.DictWriter(f, fieldnames=["id", "pred", "explanation"])
w.writeheader()
for i, r in enumerate(rows):
messages = [
{
"role": "system",
"content": "You are solving a linguistics olympiad puzzle. Answer concisely: for translation/fill_blanks/editing give only the answer string, for mapping give only the option letter, for classification give only the symbol."
},
{
"role": "user",
"content": f"CONTEXT:\n{r['context']}\n\nQUESTION:\n{r['query']}\n\nAnswer only item number {r['question_number']}."
}
]
output = generator(messages)
pred = output[0]["generated_text"][-1]["content"].strip()
print(f"[{i+1}/{len(rows)}] id={r['id']} → {pred[:50]}")
w.writerow({
"id": r["id"],
"pred": pred,
"explanation": "Qwen2.5-1.5B-Instruct zero-shot"
})
print(f"Done: {len(rows)} rows written")
|