| import csv |
| from transformers import pipeline |
|
|
| |
| generator = pipeline( |
| "text-generation", |
| model="Qwen/Qwen2.5-1.5B-Instruct", |
| device_map="auto", |
| max_new_tokens=100, |
| ) |
|
|
| |
| rows = list(csv.DictReader(open("/tmp/data/test.csv"))) |
|
|
| with open("submission.csv", "w", newline="") as f: |
| w = csv.DictWriter(f, fieldnames=["id", "pred", "explanation"]) |
| w.writeheader() |
|
|
| for i, r in enumerate(rows): |
| messages = [ |
| { |
| "role": "system", |
| "content": "You are solving a linguistics olympiad puzzle. Answer concisely: for translation/fill_blanks/editing give only the answer string, for mapping give only the option letter, for classification give only the symbol." |
| }, |
| { |
| "role": "user", |
| "content": f"CONTEXT:\n{r['context']}\n\nQUESTION:\n{r['query']}\n\nAnswer only item number {r['question_number']}." |
| } |
| ] |
|
|
| output = generator(messages) |
| pred = output[0]["generated_text"][-1]["content"].strip() |
| print(f"[{i+1}/{len(rows)}] id={r['id']} → {pred[:50]}") |
|
|
| w.writerow({ |
| "id": r["id"], |
| "pred": pred, |
| "explanation": "Qwen2.5-1.5B-Instruct zero-shot" |
| }) |
|
|
| print(f"Done: {len(rows)} rows written") |
|
|