test_model / script.py
ritaberrada's picture
Upload 2 files
d5c2b7f verified
Raw
History Blame Contribute Delete
1.41 kB
import csv
from transformers import pipeline
# Charge le modèle depuis HF (téléchargé au moment de l'exécution)
generator = pipeline(
"text-generation",
model="Qwen/Qwen2.5-1.5B-Instruct",
device_map="auto",
max_new_tokens=100,
)
# Lit le test set monté par la plateforme
rows = list(csv.DictReader(open("/tmp/data/test.csv")))
with open("submission.csv", "w", newline="") as f:
w = csv.DictWriter(f, fieldnames=["id", "pred", "explanation"])
w.writeheader()
for i, r in enumerate(rows):
messages = [
{
"role": "system",
"content": "You are solving a linguistics olympiad puzzle. Answer concisely: for translation/fill_blanks/editing give only the answer string, for mapping give only the option letter, for classification give only the symbol."
},
{
"role": "user",
"content": f"CONTEXT:\n{r['context']}\n\nQUESTION:\n{r['query']}\n\nAnswer only item number {r['question_number']}."
}
]
output = generator(messages)
pred = output[0]["generated_text"][-1]["content"].strip()
print(f"[{i+1}/{len(rows)}] id={r['id']}{pred[:50]}")
w.writerow({
"id": r["id"],
"pred": pred,
"explanation": "Qwen2.5-1.5B-Instruct zero-shot"
})
print(f"Done: {len(rows)} rows written")