{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "624c83c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DNN(\n",
       "  (fc_in): Linear(in_features=144, out_features=512, bias=True)\n",
       "  (residual_blocks): ModuleList(\n",
       "    (0-3): 4 x ResidualBlock(\n",
       "      (ln1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n",
       "      (fc1): Linear(in_features=512, out_features=1024, bias=True)\n",
       "      (ln2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
       "      (fc2): Linear(in_features=1024, out_features=512, bias=True)\n",
       "    )\n",
       "  )\n",
       "  (fc_value): Sequential(\n",
       "    (0): Linear(in_features=512, out_features=64, bias=True)\n",
       "    (1): ReLU()\n",
       "    (2): Linear(in_features=64, out_features=1, bias=True)\n",
       "  )\n",
       "  (fc_policy): Sequential(\n",
       "    (0): Linear(in_features=512, out_features=64, bias=True)\n",
       "    (1): ReLU()\n",
       "    (2): Linear(in_features=64, out_features=12, bias=True)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from rlcube.models.models import DNN\n",
    "from rlcube.envs.cube2 import Cube2Env\n",
    "import torch\n",
    "\n",
    "net = DNN()\n",
    "net.load(\"checkpoints/checkpoint_final.pth\")\n",
    "net.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "defde44e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[7, 11, 6, 7, 7, 10, 1, 0, 3, 3]\n",
      "tensor([[ 0.9634],\n",
      "        [-0.0930],\n",
      "        [-0.8327],\n",
      "        [-0.0930],\n",
      "        [-0.8955],\n",
      "        [-1.8250],\n",
      "        [-4.0525],\n",
      "        [-1.8250],\n",
      "        [-3.0264],\n",
      "        [-3.6782]], grad_fn=<AddmmBackward0>)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  1%|          | 8/1000 [00:00<00:10, 99.11it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 2, 5, 2, 8, 6]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from rlcube.models.search import MonteCarloTree\n",
    "\n",
    "env = Cube2Env()\n",
    "\n",
    "actions = []\n",
    "obs = []\n",
    "for _ in range(10):\n",
    "    action = env.action_space.sample()\n",
    "    actions.append(action.item())\n",
    "    env.step(action)\n",
    "    obs.append(env.obs())\n",
    "\n",
    "obs = torch.tensor(np.array(obs), dtype=torch.float32)\n",
    "values, policies = net(obs)\n",
    "print(actions)\n",
    "print(values)\n",
    "\n",
    "\n",
    "tree = MonteCarloTree(env.obs(), max_simulations=1000)\n",
    "if tree.is_solved:\n",
    "    print([action for _, action in tree.solved_path])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}