Spaces:

crossentropy-ai
/

rlcube

Running

App Files Files Community

imwithye commited on Sep 21

Commit

970c966

1 Parent(s): e71fda3

test

Browse files

Files changed (4) hide show

rlcube/cube2.ipynb +76 -39
rlcube/rlcube/models/models.py +1 -1
src/components/ui-controls.tsx +1 -1
src/contexts/control-context.tsx +2 -2

rlcube/cube2.ipynb CHANGED Viewed

@@ -49,7 +49,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "id": "defde44e",
    "metadata": {},
    "outputs": [
@@ -57,17 +57,43 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2, 3, 7, 6, 8, 6, 3, 2, 2, 5]\n",
-      "tensor([[ 1.1924],\n",
-      "        [ 0.0826],\n",
-      "        [ 1.0202],\n",
-      "        [ 0.0826],\n",
-      "        [ 1.1121],\n",
-      "        [-0.0302],\n",
-      "        [-1.5963],\n",
-      "        [-0.0302],\n",
-      "        [-1.3707],\n",
-      "        [-2.4068]], grad_fn=<AddmmBackward0>)\n"
      ]
     }
    ],
@@ -87,44 +113,55 @@
     "obs = torch.tensor(np.array(obs), dtype=torch.float32)\n",
     "values, policies = net(obs)\n",
     "print(actions)\n",
-    "print(values)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "id": "cae20b12",
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 14%|█▍        | 43/300 [00:00<00:02, 127.98it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[4, 3, 7, 11]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
     }
    ],
    "source": [
-    "from rlcube.models.search import MonteCarloTree\n",
-    "\n",
-    "tree = MonteCarloTree(env.obs(), max_simulations=300)\n",
-    "if tree.is_solved:\n",
-    "    print([action for _, action in tree.solved_path])"
    ]
   }
  ],
  "metadata": {

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "defde44e",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[11, 11, 3, 10, 9, 4, 5, 3, 11, 11]\n",
+      "tensor([[ 1.2608],\n",
+      "        [ 0.2146],\n",
+      "        [-0.8424],\n",
+      "        [-0.6595],\n",
+      "        [-0.4404],\n",
+      "        [-1.2381],\n",
+      "        [-0.4404],\n",
+      "        [-1.6949],\n",
+      "        [-3.1237],\n",
+      "        [-2.8188]], grad_fn=<AddmmBackward0>)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  9%|▉         | 469/5000 [00:04<00:48, 94.14it/s] \n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mKeyboardInterrupt\u001b[39m                         Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 20\u001b[39m\n\u001b[32m     16\u001b[39m \u001b[38;5;28mprint\u001b[39m(values)\n\u001b[32m     18\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mrlcube\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmodels\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msearch\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m MonteCarloTree\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m tree = \u001b[43mMonteCarloTree\u001b[49m\u001b[43m(\u001b[49m\u001b[43menv\u001b[49m\u001b[43m.\u001b[49m\u001b[43mobs\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_simulations\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m5000\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m     21\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m tree.is_solved:\n\u001b[32m     22\u001b[39m     \u001b[38;5;28mprint\u001b[39m([action \u001b[38;5;28;01mfor\u001b[39;00m _, action \u001b[38;5;129;01min\u001b[39;00m tree.solved_path])\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/models/search.py:59\u001b[39m, in \u001b[36mMonteCarloTree.__init__\u001b[39m\u001b[34m(self, obs, max_simulations)\u001b[39m\n\u001b[32m     57\u001b[39m \u001b[38;5;28mself\u001b[39m.is_solved = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m     58\u001b[39m \u001b[38;5;28mself\u001b[39m.solved_path = []\n\u001b[32m---> \u001b[39m\u001b[32m59\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_build\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/models/search.py:80\u001b[39m, in \u001b[36mMonteCarloTree._build\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m     78\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[32m12\u001b[39m):\n\u001b[32m     79\u001b[39m     obs = adjacent_obs[i]\n\u001b[32m---> \u001b[39m\u001b[32m80\u001b[39m     child = \u001b[43mNode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     81\u001b[39m     node.children[i] = child\n\u001b[32m     82\u001b[39m     \u001b[38;5;28mself\u001b[39m.nodes.append(child)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/models/search.py:21\u001b[39m, in \u001b[36mNode.__init__\u001b[39m\u001b[34m(self, obs, parent)\u001b[39m\n\u001b[32m     18\u001b[39m value = value.detach()\n\u001b[32m     19\u001b[39m policy = torch.softmax(policy.detach(), dim=\u001b[32m1\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m21\u001b[39m \u001b[38;5;28mself\u001b[39m.is_solved = \u001b[43mCube2Env\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_obs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobs\u001b[49m\u001b[43m)\u001b[49m.is_solved()\n\u001b[32m     22\u001b[39m \u001b[38;5;28mself\u001b[39m.value = torch.tensor(\u001b[32m1\u001b[39m) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.is_solved \u001b[38;5;28;01melse\u001b[39;00m value.view(-\u001b[32m1\u001b[39m)\n\u001b[32m     23\u001b[39m \u001b[38;5;28mself\u001b[39m.policy = policy.view(-\u001b[32m1\u001b[39m)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/envs/cube2.py:30\u001b[39m, in \u001b[36mCube2Env.from_obs\u001b[39m\u001b[34m(obs)\u001b[39m\n\u001b[32m     28\u001b[39m         idx = i * \u001b[32m4\u001b[39m + j\n\u001b[32m     29\u001b[39m         state[i, j] = np.argmax(obs[idx])\n\u001b[32m---> \u001b[39m\u001b[32m30\u001b[39m env = \u001b[43mCube2Env\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     31\u001b[39m env.reset(state=state)\n\u001b[32m     32\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m env\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/envs/cube2.py:16\u001b[39m, in \u001b[36mCube2Env.__init__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m     14\u001b[39m \u001b[38;5;28msuper\u001b[39m(Cube2Env, \u001b[38;5;28mself\u001b[39m).\u001b[34m__init__\u001b[39m()\n\u001b[32m     15\u001b[39m \u001b[38;5;28mself\u001b[39m.action_space = gym.spaces.Discrete(\u001b[32m12\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m \u001b[38;5;28mself\u001b[39m.observation_space = \u001b[43mgym\u001b[49m\u001b[43m.\u001b[49m\u001b[43mspaces\u001b[49m\u001b[43m.\u001b[49m\u001b[43mBox\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m     17\u001b[39m \u001b[43m    \u001b[49m\u001b[43mlow\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhigh\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[32;43m24\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m6\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mint8\u001b[49m\n\u001b[32m     18\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     19\u001b[39m \u001b[38;5;28mself\u001b[39m.state = np.zeros((\u001b[32m6\u001b[39m, \u001b[32m4\u001b[39m), dtype=np.int8)\n\u001b[32m     20\u001b[39m \u001b[38;5;28mself\u001b[39m.reset()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/.venv/lib/python3.12/site-packages/gymnasium/spaces/box.py:149\u001b[39m, in \u001b[36mBox.__init__\u001b[39m\u001b[34m(self, low, high, shape, dtype, seed)\u001b[39m\n\u001b[32m    147\u001b[39m \u001b[38;5;66;03m# Cast `low` and `high` to ndarray for the dtype min and max for out of range tests\u001b[39;00m\n\u001b[32m    148\u001b[39m \u001b[38;5;28mself\u001b[39m.low, \u001b[38;5;28mself\u001b[39m.bounded_below = \u001b[38;5;28mself\u001b[39m._cast_low(low, dtype_min)\n\u001b[32m--> \u001b[39m\u001b[32m149\u001b[39m \u001b[38;5;28mself\u001b[39m.high, \u001b[38;5;28mself\u001b[39m.bounded_above = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_cast_high\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhigh\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype_max\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    151\u001b[39m \u001b[38;5;66;03m# recheck shape for case where shape and (low or high) are provided\u001b[39;00m\n\u001b[32m    152\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.low.shape != shape:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/.venv/lib/python3.12/site-packages/gymnasium/spaces/box.py:251\u001b[39m, in \u001b[36mBox._cast_high\u001b[39m\u001b[34m(self, high, dtype_max)\u001b[39m\n\u001b[32m    241\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_cast_high\u001b[39m(\u001b[38;5;28mself\u001b[39m, high, dtype_max) -> \u001b[38;5;28mtuple\u001b[39m[np.ndarray, np.ndarray]:\n\u001b[32m    242\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Casts the input Box high value to ndarray with provided dtype.\u001b[39;00m\n\u001b[32m    243\u001b[39m \n\u001b[32m    244\u001b[39m \u001b[33;03m    Args:\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m    249\u001b[39m \u001b[33;03m        The updated high value and for what values the input is bounded (above)\u001b[39;00m\n\u001b[32m    250\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m251\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mis_float_integer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhigh\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m    252\u001b[39m         bounded_above = np.full(\u001b[38;5;28mself\u001b[39m.shape, high, dtype=\u001b[38;5;28mfloat\u001b[39m) < np.inf\n\u001b[32m    254\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m np.isnan(high):\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/.venv/lib/python3.12/site-packages/gymnasium/spaces/box.py:32\u001b[39m, in \u001b[36mis_float_integer\u001b[39m\u001b[34m(var)\u001b[39m\n\u001b[32m     28\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(np.min(arr))\n\u001b[32m     29\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(arr)\n\u001b[32m---> \u001b[39m\u001b[32m32\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mis_float_integer\u001b[39m(var: Any) -> \u001b[38;5;28mbool\u001b[39m:\n\u001b[32m     33\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Checks if a scalar variable is an integer or float (does not include bool).\"\"\"\u001b[39;00m\n\u001b[32m     34\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m np.issubdtype(\u001b[38;5;28mtype\u001b[39m(var), np.integer) \u001b[38;5;129;01mor\u001b[39;00m np.issubdtype(\u001b[38;5;28mtype\u001b[39m(var), np.floating)\n",
+      "\u001b[31mKeyboardInterrupt\u001b[39m: "
      ]
     }
    ],
     "obs = torch.tensor(np.array(obs), dtype=torch.float32)\n",
     "values, policies = net(obs)\n",
     "print(actions)\n",
+    "print(values)\n",
+    "\n",
+    "from rlcube.models.search import MonteCarloTree\n",
+    "\n",
+    "tree = MonteCarloTree(env.obs(), max_simulations=1000)\n",
+    "if tree.is_solved:\n",
+    "    print([action for _, action in tree.solved_path])"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
+   "id": "a91732d7",
    "metadata": {},
    "outputs": [
     {
+     "data": {
+      "text/plain": [
+       "defaultdict(<function rlcube.models.search.Node.__init__.<locals>.<lambda>()>,\n",
+       "            {0: 400,\n",
+       "             1: 0,\n",
+       "             2: 0,\n",
+       "             3: 0,\n",
+       "             4: 0,\n",
+       "             5: 0,\n",
+       "             6: 0,\n",
+       "             7: 0,\n",
+       "             8: 0,\n",
+       "             9: 0,\n",
+       "             10: 44,\n",
+       "             11: 0})"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
+    "tree.root.N"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99d79934",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

rlcube/rlcube/models/models.py CHANGED Viewed

@@ -76,7 +76,7 @@ class DNN(nn.Module):
         torch.save(self.state_dict(), filepath)
     def load(self, filepath: str):
-        self.load_state_dict(torch.load(filepath))
 class DNN2(nn.Module):

         torch.save(self.state_dict(), filepath)
     def load(self, filepath: str):
+        self.load_state_dict(torch.load(filepath, map_location=torch.device("cpu")))
 class DNN2(nn.Module):

src/components/ui-controls.tsx CHANGED Viewed

@@ -27,7 +27,7 @@ export const UIControls = () => {
   } = useControlContext();
   const scramble = () => {
-    const scrambleSteps = Array.from({ length: 20 }, () => Actions[Math.floor(Math.random() * Actions.length)]);
     rubiksCubeRef?.current?.rotate(scrambleSteps);
   };

   } = useControlContext();
   const scramble = () => {
+    const scrambleSteps = Array.from({ length: 5 }, () => Actions[Math.floor(Math.random() * Actions.length)]);
     rubiksCubeRef?.current?.rotate(scrambleSteps);
   };

src/contexts/control-context.tsx CHANGED Viewed

@@ -23,7 +23,7 @@ export const ControlContext = createContext<ControlContextType>({
   setShowRotationIndicators: () => {},
   cubeRoughness: 0.5,
   setCubeRoughness: () => {},
-  cubeSpeed: 2,
   setCubeSpeed: () => {},
   background: 'sunset',
   setBackground: () => {},
@@ -38,7 +38,7 @@ export const useControlContext = () => {
 export const ControlProvider = ({ children }: { children: React.ReactNode }) => {
   const [showRotationIndicators, setShowRotationIndicators] = useState(false);
   const [cubeRoughness, setCubeRoughness] = useState(0.5);
-  const [cubeSpeed, setCubeSpeed] = useState(2);
   const [background, setBackground] = useState<PresetsType>('sunset');
   const [rubiksCubeRef, setRubiksCubeRef] = useState<RefObject<RubiksCubeRef | null> | undefined>(undefined);

   setShowRotationIndicators: () => {},
   cubeRoughness: 0.5,
   setCubeRoughness: () => {},
+  cubeSpeed: 8,
   setCubeSpeed: () => {},
   background: 'sunset',
   setBackground: () => {},
 export const ControlProvider = ({ children }: { children: React.ReactNode }) => {
   const [showRotationIndicators, setShowRotationIndicators] = useState(false);
   const [cubeRoughness, setCubeRoughness] = useState(0.5);
+  const [cubeSpeed, setCubeSpeed] = useState(8);
   const [background, setBackground] = useState<PresetsType>('sunset');
   const [rubiksCubeRef, setRubiksCubeRef] = useState<RefObject<RubiksCubeRef | null> | undefined>(undefined);