rogermt
/

ARC-AGI

Model card Files Files and versions

xet

Community

rogermt commited on 9 days ago

Commit

b9fb03c

verified ·

1 Parent(s): d979316

Add Lightning.ai multi-GPU notebook with Ollama + Qwen2.5-Coder (auto-shards across GPUs)

Browse files

Files changed (1) hide show

notebooks/pemf_llm_lightning.ipynb +303 -0

notebooks/pemf_llm_lightning.ipynb ADDED Viewed

	@@ -0,0 +1,303 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PEMF ARC-AGI — LLM Solver (Lightning.ai / Multi-GPU)\n",
+    "\n",
+    "Runs Ollama with auto multi-GPU sharding for local inference.\n",
+    "\n",
+    "| GPU Config | Model | VRAM | Quality |\n",
+    "|---|---|---|---|\n",
+    "| 2xA10G (48GB) | qwen2.5-coder:32b | ~20GB q4 | Best |\n",
+    "| 2xL4 (48GB) | qwen2.5-coder:32b | ~20GB q4 | Best |\n",
+    "| 2xT4 (32GB) | qwen2.5-coder:14b | ~10GB q4 | Good |\n",
+    "| 1xA10G (24GB) | qwen2.5-coder:14b | ~10GB | Good |\n",
+    "| 4xA10G (96GB) | qwen2.5-coder:32b fp16 | ~65GB | Best+fast |"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============ CONFIGURATION ============\n",
+    "MODEL = 'qwen2.5-coder:32b'\n",
+    "# MODEL = 'qwen2.5-coder:14b'  # fallback for less VRAM\n",
+    "N_CANDIDATES = 8"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import subprocess, os, time, json, re, glob\n",
+    "import numpy as np, urllib.request\n",
+    "from collections import Counter\n",
+    "\n",
+    "# Check GPUs\n",
+    "!nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader\n",
+    "gpu_count = len(subprocess.run(['nvidia-smi','-L'], capture_output=True, text=True).stdout.strip().split('\\n'))\n",
+    "print(f'GPUs: {gpu_count}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install Ollama\n",
+    "try:\n",
+    "    subprocess.run(['ollama','--version'], capture_output=True, check=True)\n",
+    "    print('Ollama installed')\n",
+    "except: \n",
+    "    !curl -fsSL https://ollama.com/install.sh | sh\n",
+    "\n",
+    "# Start server (auto-detects all GPUs)\n",
+    "subprocess.run(['pkill','-f','ollama'], capture_output=True)\n",
+    "time.sleep(2)\n",
+    "env = os.environ.copy()\n",
+    "env['CUDA_VISIBLE_DEVICES'] = ','.join(str(i) for i in range(gpu_count))\n",
+    "server = subprocess.Popen(['ollama','serve'],\n",
+    "    stdout=open('/tmp/ollama.log','w'), stderr=subprocess.STDOUT, env=env)\n",
+    "time.sleep(5)\n",
+    "print(f'Server PID {server.pid}, GPUs: {env[\"CUDA_VISIBLE_DEVICES\"]}')\n",
+    "\n",
+    "# Pull model\n",
+    "print(f'Pulling {MODEL}...')\n",
+    "r = subprocess.run(['ollama','pull',MODEL], capture_output=True, text=True, timeout=3600)\n",
+    "if r.returncode != 0:\n",
+    "    print(f'Failed, trying 14b...'); MODEL='qwen2.5-coder:14b'\n",
+    "    subprocess.run(['ollama','pull',MODEL], capture_output=True, text=True, timeout=3600)\n",
+    "print(f'{MODEL} ready')\n",
+    "\n",
+    "# Test\n",
+    "r = subprocess.run(['ollama','run',MODEL,'Say hello'], capture_output=True, text=True, timeout=60)\n",
+    "print(f'Test: {r.stdout.strip()[:80]}')\n",
+    "!nvidia-smi --query-gpu=index,memory.used,memory.total --format=csv,noheader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download ARC data\n",
+    "if not os.path.exists('arc_data/training'):\n",
+    "    !git clone --depth 1 https://github.com/fchollet/ARC-AGI.git /tmp/arc\n",
+    "    os.makedirs('arc_data', exist_ok=True)\n",
+    "    !cp -r /tmp/arc/data/training arc_data/training\n",
+    "print(f'Tasks: {len(glob.glob(\"arc_data/training/*.json\"))}')\n",
+    "\n",
+    "ALREADY_SOLVED = {\n",
+    "    '007bbfb7','00d62c1b','0d3d703e','1190e5a7','1cf80156','1e0a9b12','1f85a75f',\n",
+    "    '2013d3e2','22168020','22eb0ac0','239be575','23b5c85d','28bf18c6','2dee498d',\n",
+    "    '3618c87e','3906de3d','3aa6fb7a','3af2c5a8','3c9b0459','42a50994','4347f46a',\n",
+    "    '50cb2852','6150a2bd','62c24649','67385a82','67a3c6ac','67e8384a','68b16354',\n",
+    "    '6d0aefbc','6f8cd79b','6fa7a44f','746b3537','74dd1130','7b7f7511','7e0986d6',\n",
+    "    '7f4411dc','868de0fa','8be77c9e','8d5021e8','91714a58','9172f3a0','9565186b',\n",
+    "    '9dfd6313','a416b8f3','a5313dff','a699fb00','aabf363d','aedd82e4','b1948b0a',\n",
+    "    'b6afb2da','ba97ae07','bb43febb','bda2d7a6','be94b721','c0f76784','c59eb873',\n",
+    "    'c8f0f002','c9e6f938','d10ecb37','d23f8c26','d511f180','d631b094','d90796e8',\n",
+    "    'd9fac9be','de1cd16c','ded97339','e26a3af2','eb5a1d5d','ed36ccf7','f76d97a5',\n",
+    "}\n",
+    "task_files = sorted(glob.glob('arc_data/training/*.json'))\n",
+    "unsolved = [(os.path.basename(f).replace('.json',''),f) for f in task_files\n",
+    "            if os.path.basename(f).replace('.json','') not in ALREADY_SOLVED]\n",
+    "print(f'Symbolic: {len(ALREADY_SOLVED)}, LLM to try: {len(unsolved)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# LLM Engine\n",
+    "def call_ollama(prompt, model, temperature=0.7):\n",
+    "    payload = {'model':model,'prompt':prompt,'stream':False,\n",
+    "               'options':{'temperature':temperature,'num_predict':2048}}\n",
+    "    req = urllib.request.Request('http://localhost:11434/api/generate',\n",
+    "        data=json.dumps(payload).encode(), headers={'Content-Type':'application/json'}, method='POST')\n",
+    "    try:\n",
+    "        with urllib.request.urlopen(req, timeout=180) as resp:\n",
+    "            return json.loads(resp.read().decode()).get('response','')\n",
+    "    except Exception as e: return f'ERROR: {e}'\n",
+    "\n",
+    "def build_prompt(task):\n",
+    "    pairs = task.get('train',[])\n",
+    "    ex = '\\n'.join(f\"Example {i+1}:\\n  Input:  {json.dumps(p['input'])}\\n  Output: {json.dumps(p['output'])}\"\n",
+    "                    for i,p in enumerate(pairs))\n",
+    "    inps = [np.array(p['input']) for p in pairs]\n",
+    "    outs = [np.array(p['output']) for p in pairs]\n",
+    "    same = all(i.shape==o.shape for i,o in zip(inps,outs))\n",
+    "    ic = sorted(set(c for i in inps for c in np.unique(i).tolist()))\n",
+    "    oc = sorted(set(c for o in outs for c in np.unique(o).tolist()))\n",
+    "    a = f\"  Same shape: {same}\\n  Colors in: {ic}, out: {oc}\\n\"\n",
+    "    if not same: a += f\"  Shape: {inps[0].shape} -> {outs[0].shape}\\n\"\n",
+    "    return f\"\"\"Solve this ARC-AGI puzzle. Write ONLY a Python function, no explanations.\n",
+    "\n",
+    "{ex}\n",
+    "\n",
+    "Analysis:\n",
+    "{a}\n",
+    "```python\n",
+    "import numpy as np\n",
+    "from collections import Counter, deque\n",
+    "\n",
+    "def transform(grid: list[list[int]]) -> list[list[int]]:\n",
+    "    grid = np.array(grid)\n",
+    "\"\"\"\n",
+    "\n",
+    "def extract_code(resp):\n",
+    "    for pat in [r'```python\\s*(.*?)```', r'```\\s*(.*?)```']:\n",
+    "        for m in re.findall(pat, resp, re.DOTALL):\n",
+    "            if 'def transform' in m: return m.strip()\n",
+    "    idx = resp.find('def transform')\n",
+    "    if idx >= 0:\n",
+    "        before = resp[:idx]\n",
+    "        s = max(before.rfind('import '), before.rfind('from '))\n",
+    "        code = resp[s if s>=0 else idx:]\n",
+    "        end = code.find('```')\n",
+    "        if end>0: code=code[:end]\n",
+    "        return code.strip()\n",
+    "    s = resp.strip()\n",
+    "    if s.startswith(('import','def transform','from')): return s\n",
+    "    return None\n",
+    "\n",
+    "def verify(code, pairs):\n",
+    "    ns = {'np':np,'numpy':np,'Counter':Counter,'deque':__import__('collections').deque}\n",
+    "    try:\n",
+    "        import scipy.ndimage; ns['scipy']=__import__('scipy')\n",
+    "    except: pass\n",
+    "    try: exec(code, ns)\n",
+    "    except: return False\n",
+    "    if 'transform' not in ns: return False\n",
+    "    fn = ns['transform']\n",
+    "    for p in pairs:\n",
+    "        try:\n",
+    "            r = np.array(fn([row[:] for row in p['input']]), dtype=int)\n",
+    "            e = np.array(p['output'], dtype=int)\n",
+    "            if r.shape!=e.shape or not np.array_equal(r,e): return False\n",
+    "        except: return False\n",
+    "    return True\n",
+    "\n",
+    "def apply_prog(code, inp):\n",
+    "    ns = {'np':np,'numpy':np,'Counter':Counter,'deque':__import__('collections').deque}\n",
+    "    try:\n",
+    "        import scipy.ndimage; ns['scipy']=__import__('scipy')\n",
+    "    except: pass\n",
+    "    try:\n",
+    "        exec(code, ns)\n",
+    "        r = ns['transform']([row[:] for row in inp])\n",
+    "        if r is not None: return np.array(r,dtype=int).tolist()\n",
+    "    except: pass\n",
+    "    return None\n",
+    "\n",
+    "print('Engine ready')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Quick test\n",
+    "with open(f'arc_data/training/{unsolved[0][0]}.json') as f: t=json.load(f)\n",
+    "print(f'Test on {unsolved[0][0]}...')\n",
+    "s=time.time(); r=call_ollama(build_prompt(t),MODEL,0.1); e=time.time()-s\n",
+    "code=extract_code(r)\n",
+    "if code: print(f'{e:.1f}s, {len(code)}ch, verified: {\"Y\" if verify(code,t[\"train\"]) else \"N\"}')\n",
+    "else: print(f'{e:.1f}s, no code')\n",
+    "est = e*N_CANDIDATES*len(unsolved)/3600\n",
+    "print(f'Est total: {est:.1f}h for {len(unsolved)} tasks x {N_CANDIDATES} candidates')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# === MAIN LOOP (crash-safe, resumable) ===\n",
+    "results = {}\n",
+    "solved = 0\n",
+    "total_time = 0\n",
+    "\n",
+    "if os.path.exists('llm_results.json'):\n",
+    "    with open('llm_results.json') as f: prev=json.load(f)\n",
+    "    results=prev.get('results',{})\n",
+    "    solved=sum(1 for r in results.values() if r['status']=='solved')\n",
+    "    total_time=prev.get('total_time_s',0)\n",
+    "    print(f'Resuming: {solved} LLM-solved, {len(results)} attempted')\n",
+    "\n",
+    "for idx,(tid,tf) in enumerate(unsolved):\n",
+    "    if tid in results: continue\n",
+    "    with open(tf) as f: task=json.load(f)\n",
+    "    print(f'[{idx+1:3d}/{len(unsolved)}] {tid}:',end=' ',flush=True)\n",
+    "    s=time.time(); prompt=build_prompt(task); ok=False\n",
+    "    for i in range(N_CANDIDATES):\n",
+    "        temp=0.1 if i==0 else min(0.4+0.15*i,1.2)\n",
+    "        resp=call_ollama(prompt,MODEL,temp)\n",
+    "        if resp.startswith('ERROR:'): continue\n",
+    "        code=extract_code(resp)\n",
+    "        if code and verify(code,task['train']):\n",
+    "            e=time.time()-s; total_time+=e; solved+=1\n",
+    "            to=[apply_prog(code,t['input']) for t in task.get('test',[])]\n",
+    "            results[tid]={'status':'solved','rule':f'llm_c{i+1}','code':code,\n",
+    "                         'test_outputs':to,'time_s':round(e,2)}\n",
+    "            print(f'✅ c{i+1} ({e:.1f}s) [{len(ALREADY_SOLVED)+solved}/{len(task_files)}]')\n",
+    "            ok=True; break\n",
+    "    if not ok:\n",
+    "        e=time.time()-s; total_time+=e\n",
+    "        results[tid]={'status':'failed','time_s':round(e,2)}\n",
+    "        print(f'❌ ({e:.1f}s)')\n",
+    "    if (idx+1)%5==0 or ok:\n",
+    "        with open('llm_results.json','w') as f:\n",
+    "            json.dump({'model':MODEL,'n_candidates':N_CANDIDATES,'llm_solved':solved,\n",
+    "                'attempted':len(results),'symbolic_solved':len(ALREADY_SOLVED),\n",
+    "                'total_solved':len(ALREADY_SOLVED)+solved,'total_tasks':len(task_files),\n",
+    "                'solve_rate':round(100*(len(ALREADY_SOLVED)+solved)/len(task_files),2),\n",
+    "                'total_time_s':round(total_time,1),'results':results},f,indent=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Final save + summary\n",
+    "with open('llm_results.json','w') as f:\n",
+    "    json.dump({'model':MODEL,'n_candidates':N_CANDIDATES,'llm_solved':solved,\n",
+    "        'attempted':len(results),'symbolic_solved':len(ALREADY_SOLVED),\n",
+    "        'total_solved':len(ALREADY_SOLVED)+solved,'total_tasks':len(task_files),\n",
+    "        'solve_rate':round(100*(len(ALREADY_SOLVED)+solved)/len(task_files),2),\n",
+    "        'total_time_s':round(total_time,1),'results':results},f,indent=2)\n",
+    "\n",
+    "print(f'\\n{\"=\"*60}')\n",
+    "print(f'LLM solved:     {solved}')\n",
+    "print(f'Symbolic:       {len(ALREADY_SOLVED)}')\n",
+    "print(f'TOTAL:          {len(ALREADY_SOLVED)+solved}/{len(task_files)} ({100*(len(ALREADY_SOLVED)+solved)/len(task_files):.1f}%)')\n",
+    "print(f'Time:           {total_time/3600:.1f}h')\n",
+    "print(f'\\nDownload llm_results.json, then run:')\n",
+    "print(f'  python scripts/merge_results.py arc_results/summary_v4.json llm_results.json')\n",
+    "\n",
+    "subprocess.run(['pkill','-f','ollama'], capture_output=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {"display_name":"Python 3","language":"python","name":"python3"},
+  "language_info": {"name":"python","version":"3.10.0"}
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}