Spaces:

ycwhencpp
/

final-iteration

Paused

App Files Files Community

vaibhav12332112312 commited on 13 days ago

Commit

a1be3fe

1 Parent(s): 6c01076

update

Browse files

Files changed (1) hide show

training/train_grpo.ipynb +73 -61

training/train_grpo.ipynb CHANGED Viewed

@@ -23,27 +23,41 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 1: Install dependencies\n",
         "!pip install -q torch torchvision torchaudio\n",
-        "!pip install -q transformers>=4.40.0 accelerate peft>=0.10.0 trl>=0.8.0 datasets bitsandbytes\n",
         "!pip install -q matplotlib pandas\n",
         "!pip install -q pydantic httpx\n",
         "!pip install -q \"openenv-core[core]>=0.2.2\""
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 2: Clone the repo and set up paths\n",
         "import os, sys\n",
         "REPO_DIR = \"/content/viral-posts-env\"\n",
         "if not os.path.exists(REPO_DIR):\n",
-        "    !git clone https://github.com/VaibhavKhandare/viral-posts-env.git {REPO_DIR}\n",
         "os.chdir(REPO_DIR)\n",
         "sys.path.insert(0, REPO_DIR)\n",
         "\n",
@@ -51,13 +65,13 @@
         "os.makedirs(PLOTS_DIR, exist_ok=True)\n",
         "print(f\"Working dir: {os.getcwd()}\")\n",
         "print(f\"Plots dir: {PLOTS_DIR}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 3: Imports\n",
         "import json, random, time, textwrap, copy\n",
@@ -84,9 +98,7 @@
         "\n",
         "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}\")\n",
         "print(f\"Tags: {len(TAG_POOL)}, Topics: {len(ALL_TOPICS)}, Horizon: {TASK_HORIZON} days\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -99,7 +111,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 4: Define heuristic agents + episode runner\n",
         "_rng = random.Random(42)\n",
@@ -176,13 +190,13 @@
         "            \"rewards\": rewards, \"energies\": energies}\n",
         "\n",
         "print(\"Agents and episode runner defined.\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 5: Run baselines\n",
         "print(\"Running heuristic baselines (5 agents × 3 tasks)...\")\n",
@@ -205,13 +219,13 @@
         "for name in BASELINE_AGENTS:\n",
         "    scores = [baseline_results[name][t][\"grader_score\"] for t in TASKS]\n",
         "    print(f\"{name:<14s} {scores[0]:>10.4f} {scores[1]:>12.4f} {scores[2]:>14.4f} {sum(scores)/3:>8.4f}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 6: Baseline plots\n",
         "fig, axes = plt.subplots(1, 3, figsize=(16, 5), sharey=True)\n",
@@ -229,9 +243,7 @@
         "fig.tight_layout()\n",
         "fig.savefig(f\"{PLOTS_DIR}/baseline_leaderboard.png\", dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -244,7 +256,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 7: Load model\n",
         "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
@@ -268,13 +282,13 @@
         "model.eval()\n",
         "print(f\"Model loaded. Device: {model.device}\")\n",
         "print(f\"Memory: {torch.cuda.memory_allocated()/1e9:.1f} GB\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 8: LLM agent functions\n",
         "SYSTEM_PROMPT = textwrap.dedent(\"\"\"\\\n",
@@ -390,9 +404,7 @@
         "            \"burned_out\": obs.creator_energy <= 0}\n",
         "\n",
         "print(\"LLM agent functions defined.\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -405,7 +417,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 9: Run untrained model\n",
         "print(\"Running UNTRAINED base model on all tasks...\")\n",
@@ -422,9 +436,7 @@
         "print(\"BEFORE TRAINING:\")\n",
         "for t in TASKS:\n",
         "    print(f\"  {t}: grader={before_results[t]['grader_score']:.4f}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -443,7 +455,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 10: Attach LoRA adapter\n",
         "from peft import LoraConfig, get_peft_model, TaskType\n",
@@ -458,13 +472,13 @@
         "model.enable_input_require_grads()\n",
         "peft_model = get_peft_model(model, lora_config)\n",
         "peft_model.print_trainable_parameters()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 11: Training loop\n",
         "from trl import SFTTrainer, SFTConfig\n",
@@ -529,14 +543,14 @@
         "        warmup_steps=5,\n",
         "        logging_steps=5,\n",
         "        save_strategy=\"no\",\n",
-        "        max_seq_length=1024,\n",
         "        fp16=True,\n",
         "        report_to=\"none\",\n",
         "    )\n",
         "\n",
         "    peft_model.train()\n",
         "    trainer = SFTTrainer(\n",
-        "        model=peft_model, tokenizer=tokenizer,\n",
         "        train_dataset=dataset, args=sft_config,\n",
         "    )\n",
         "    train_result = trainer.train()\n",
@@ -555,9 +569,7 @@
         "elapsed = time.time() - t_start\n",
         "print(f\"\\nTraining complete in {elapsed/60:.1f} min\")\n",
         "print(pd.DataFrame(training_log).to_string(index=False))"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -570,7 +582,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 12: Run trained model\n",
         "print(\"Running TRAINED model on all tasks...\")\n",
@@ -588,9 +602,7 @@
         "print(\"AFTER TRAINING:\")\n",
         "for t in TASKS:\n",
         "    print(f\"  {t}: grader={after_results[t]['grader_score']:.4f}\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -601,7 +613,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 13: Training curves\n",
         "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
@@ -623,13 +637,13 @@
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/reward_curve.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 14: Before vs After\n",
         "task_labels = [t.replace('monthly_', '').title() for t in TASKS]\n",
@@ -659,13 +673,13 @@
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/before_after.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 15: Trajectory comparison\n",
         "fig, axes = plt.subplots(2, 3, figsize=(16, 8))\n",
@@ -689,9 +703,7 @@
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/training_trajectories.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -702,7 +714,9 @@
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 16: Final summary\n",
         "print(\"=\" * 67)\n",
@@ -739,13 +753,13 @@
         "\n",
         "print(f\"\\nSaved to {PLOTS_DIR}/\")\n",
         "print(\"All results are from real LoRA weight updates on real environment runs.\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "metadata": {},
       "source": [
         "# Cell 17: Save adapter\n",
         "save_path = \"./viraltest_trained_adapter\"\n",
@@ -753,24 +767,22 @@
         "tokenizer.save_pretrained(save_path)\n",
         "print(f\"LoRA adapter saved to {save_path}\")\n",
         "print(\"Load with: PeftModel.from_pretrained(base_model, save_path)\")"
-      ],
-      "execution_count": null,
-      "outputs": []
     }
   ],
   "metadata": {
     "kernelspec": {
-      "display_name": "Python 3",
       "language": "python",
       "name": "python3"
     },
     "language_info": {
       "name": "python",
-      "version": "3.10.0"
-    },
-    "accelerator": "GPU",
-    "gpuClass": "standard"
   },
   "nbformat": 4,
   "nbformat_minor": 4
-}

     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [
+        {
+          "ename": "",
+          "evalue": "",
+          "output_type": "error",
+          "traceback": [
+            "\u001b[1;31mRunning cells with '.venv (Python 3.13.1)' requires the ipykernel package.\n",
+            "\u001b[1;31mInstall 'ipykernel' into the Python environment. \n",
+            "\u001b[1;31mCommand: '/Users/vaibhavkhandare/Projects/mernstack/openenv-course/viraltest/.venv/bin/python -m pip install ipykernel -U --force-reinstall'"
+          ]
+        }
+      ],
       "source": [
         "# Cell 1: Install dependencies\n",
         "!pip install -q torch torchvision torchaudio\n",
+        "!pip install -q transformers>=4.45.0 accelerate peft>=0.10.0 trl>=0.20.0 datasets bitsandbytes\n",
         "!pip install -q matplotlib pandas\n",
         "!pip install -q pydantic httpx\n",
         "!pip install -q \"openenv-core[core]>=0.2.2\""
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 2: Clone the repo and set up paths\n",
         "import os, sys\n",
         "REPO_DIR = \"/content/viral-posts-env\"\n",
+        "REPO_BRANCH = \"hack1\"\n",
         "if not os.path.exists(REPO_DIR):\n",
+        "    !git clone --branch {REPO_BRANCH} --depth 1 https://github.com/VaibhavKhandare/viral-posts-env.git {REPO_DIR}\n",
         "os.chdir(REPO_DIR)\n",
         "sys.path.insert(0, REPO_DIR)\n",
         "\n",
         "os.makedirs(PLOTS_DIR, exist_ok=True)\n",
         "print(f\"Working dir: {os.getcwd()}\")\n",
         "print(f\"Plots dir: {PLOTS_DIR}\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 3: Imports\n",
         "import json, random, time, textwrap, copy\n",
         "\n",
         "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}\")\n",
         "print(f\"Tags: {len(TAG_POOL)}, Topics: {len(ALL_TOPICS)}, Horizon: {TASK_HORIZON} days\")"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 4: Define heuristic agents + episode runner\n",
         "_rng = random.Random(42)\n",
         "            \"rewards\": rewards, \"energies\": energies}\n",
         "\n",
         "print(\"Agents and episode runner defined.\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 5: Run baselines\n",
         "print(\"Running heuristic baselines (5 agents × 3 tasks)...\")\n",
         "for name in BASELINE_AGENTS:\n",
         "    scores = [baseline_results[name][t][\"grader_score\"] for t in TASKS]\n",
         "    print(f\"{name:<14s} {scores[0]:>10.4f} {scores[1]:>12.4f} {scores[2]:>14.4f} {sum(scores)/3:>8.4f}\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 6: Baseline plots\n",
         "fig, axes = plt.subplots(1, 3, figsize=(16, 5), sharey=True)\n",
         "fig.tight_layout()\n",
         "fig.savefig(f\"{PLOTS_DIR}/baseline_leaderboard.png\", dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 7: Load model\n",
         "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
         "model.eval()\n",
         "print(f\"Model loaded. Device: {model.device}\")\n",
         "print(f\"Memory: {torch.cuda.memory_allocated()/1e9:.1f} GB\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 8: LLM agent functions\n",
         "SYSTEM_PROMPT = textwrap.dedent(\"\"\"\\\n",
         "            \"burned_out\": obs.creator_energy <= 0}\n",
         "\n",
         "print(\"LLM agent functions defined.\")"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 9: Run untrained model\n",
         "print(\"Running UNTRAINED base model on all tasks...\")\n",
         "print(\"BEFORE TRAINING:\")\n",
         "for t in TASKS:\n",
         "    print(f\"  {t}: grader={before_results[t]['grader_score']:.4f}\")"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 10: Attach LoRA adapter\n",
         "from peft import LoraConfig, get_peft_model, TaskType\n",
         "model.enable_input_require_grads()\n",
         "peft_model = get_peft_model(model, lora_config)\n",
         "peft_model.print_trainable_parameters()"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 11: Training loop\n",
         "from trl import SFTTrainer, SFTConfig\n",
         "        warmup_steps=5,\n",
         "        logging_steps=5,\n",
         "        save_strategy=\"no\",\n",
+        "        max_length=1024,\n",
         "        fp16=True,\n",
         "        report_to=\"none\",\n",
         "    )\n",
         "\n",
         "    peft_model.train()\n",
         "    trainer = SFTTrainer(\n",
+        "        model=peft_model, processing_class=tokenizer,\n",
         "        train_dataset=dataset, args=sft_config,\n",
         "    )\n",
         "    train_result = trainer.train()\n",
         "elapsed = time.time() - t_start\n",
         "print(f\"\\nTraining complete in {elapsed/60:.1f} min\")\n",
         "print(pd.DataFrame(training_log).to_string(index=False))"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 12: Run trained model\n",
         "print(\"Running TRAINED model on all tasks...\")\n",
         "print(\"AFTER TRAINING:\")\n",
         "for t in TASKS:\n",
         "    print(f\"  {t}: grader={after_results[t]['grader_score']:.4f}\")"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 13: Training curves\n",
         "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/reward_curve.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 14: Before vs After\n",
         "task_labels = [t.replace('monthly_', '').title() for t in TASKS]\n",
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/before_after.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 15: Trajectory comparison\n",
         "fig, axes = plt.subplots(2, 3, figsize=(16, 8))\n",
         "fig.tight_layout()\n",
         "fig.savefig(f'{PLOTS_DIR}/training_trajectories.png', dpi=150, bbox_inches='tight')\n",
         "plt.show()"
+      ]
     },
     {
       "cell_type": "markdown",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 16: Final summary\n",
         "print(\"=\" * 67)\n",
         "\n",
         "print(f\"\\nSaved to {PLOTS_DIR}/\")\n",
         "print(\"All results are from real LoRA weight updates on real environment runs.\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "# Cell 17: Save adapter\n",
         "save_path = \"./viraltest_trained_adapter\"\n",
         "tokenizer.save_pretrained(save_path)\n",
         "print(f\"LoRA adapter saved to {save_path}\")\n",
         "print(\"Load with: PeftModel.from_pretrained(base_model, save_path)\")"
+      ]
     }
   ],
   "metadata": {
+    "accelerator": "GPU",
+    "gpuClass": "standard",
     "kernelspec": {
+      "display_name": ".venv",
       "language": "python",
       "name": "python3"
     },
     "language_info": {
       "name": "python",
+      "version": "3.13.1"
+    }
   },
   "nbformat": 4,
   "nbformat_minor": 4
+}