Spaces:

xhiroga
/

pokemon-palworld

Sleeping

App Files Files Community

xhiroga commited on Jan 28, 2024

Commit

c0fe349

•

1 Parent(s): f8bd42c

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

app.py +1 -0
models/model.safetensors +1 -1
notebooks/crop.ipynb +221 -0
notebooks/nobg.ipynb +94 -0
notebooks/train.ipynb +0 -0

app.py CHANGED Viewed

@@ -32,6 +32,7 @@ def classify_image(input_image: Image):
     # Forward pass the input through the model
     output = model(input_tensor)
     probabilities = torch.nn.functional.softmax(output, dim=1)

     # Forward pass the input through the model
     output = model(input_tensor)
+    print(output)
     probabilities = torch.nn.functional.softmax(output, dim=1)

models/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3342f4ace31995d4255d3b4a7017955fa83014ed9c9304b1aaa4f333a3c54271
 size 1074051192

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee6f0b0f6d957c868e1fb383c627d0de3095f5fac670c084e81cb81b29b43b73
 size 1074051192

notebooks/crop.ipynb ADDED Viewed

	@@ -0,0 +1,221 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Red channel:\n",
+      "tensor([[0.0000],\n",
+      "        [0.9294]])\n",
+      "Green channel:\n",
+      "tensor([[0.0000],\n",
+      "        [0.1098]])\n",
+      "Blue channel:\n",
+      "tensor([[0.0000],\n",
+      "        [0.1412]])\n",
+      "Alpha channel:\n",
+      "tensor([[0.],\n",
+      "        [1.]])\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMwAAAGFCAYAAACxAhziAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAEO0lEQVR4nO3VsQ3CUBAFQYyowDkh/RdESE4LRwl4A+vL0kx8wUtWt83M3IBD7qsHwJUIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEgoFAMBA8jh5+n68zd8By++f998aHgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDASCgUAwEAgGAsFAIBgIBAOBYCAQDATbzMzqEXAVPgwEgoFAMBAIBgLBQCAYCAQDgWAgEAwEP8rSDgOaVu6AAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from PIL import Image\n",
+    "from torchvision import transforms\n",
+    "\n",
+    "\n",
+    "def show_rgba(image_path):\n",
+    "    # See RGBA data\n",
+    "    image = Image.open(image_path)\n",
+    "    to_tensor = transforms.ToTensor()\n",
+    "    tensor = to_tensor(image)\n",
+    "\n",
+    "    for i, color in enumerate(['Red', 'Green', 'Blue', 'Alpha']):\n",
+    "        print(f\"{color} channel:\")\n",
+    "        print(tensor[i])\n",
+    "    plt.imshow(tensor.permute(1, 2, 0))\n",
+    "    plt.axis('off')\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "show_rgba('../data/samples/transparent_indonesia_flag.png')\n",
+    "# Alpha channel: tensor([[0.], [1.]]). It means alpha 0 is transparent.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "import numpy as np\n",
+    "import os\n",
+    "\n",
+    "from PIL import Image\n",
+    "\n",
+    "\n",
+    "def get_object_bounding_boxes(image):\n",
+    "    # アルファチャンネルを取得し、バイナリマスクを作成\n",
+    "    alpha_channel = image[:, :, 3]\n",
+    "\n",
+    "    # cv2.threshold関数を使用して、アルファチャンネルの値が1以上のピクセルを255（白）に、それ以外を0（黒）に変換します。\n",
+    "    # これにより、画像のオブジェクト部分を白、背景部分を黒としたバイナリマスクが作成されます。\n",
+    "    _, binary_mask = cv2.threshold(alpha_channel, 1, 255, cv2.THRESH_BINARY)\n",
+    "\n",
+    "    # 輪郭を検出\n",
+    "    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
+    "\n",
+    "    return contours or []\n",
+    "\n",
+    "\n",
+    "def show_bounding_boxes(image_path):\n",
+    "    # 透明背景の画像をRGBA形式で読み込む\n",
+    "    image_pil = Image.open(image_path)\n",
+    "\n",
+    "    # PIL画像をOpenCV形式に変換\n",
+    "    image = np.array(image_pil)\n",
+    "\n",
+    "    # バウンディングボックスの取得\n",
+    "    contours = get_object_bounding_boxes(image)\n",
+    "    rects = [cv2.boundingRect(c) for c in contours]\n",
+    "\n",
+    "    # バウンディングボックスの描画\n",
+    "    image_bgr = cv2.cvtColor(image[:, :, :3], cv2.COLOR_RGB2BGR)\n",
+    "    [\n",
+    "        cv2.rectangle(image_bgr, (x, y), (x + w, y + h), (0, 255, 0), 2)\n",
+    "        for [x,y,w,h] in rects\n",
+    "    ]\n",
+    "\n",
+    "    # バウンディングボックスを適��した画像の表示\n",
+    "    cv2.imshow('Bounding Box', image_bgr)\n",
+    "    cv2.waitKey(0)\n",
+    "    cv2.destroyAllWindows()\n",
+    "\n",
+    "\n",
+    "show_bounding_boxes('../data/nobg/ポケットモンスターシールド/2020022922273500_s.png')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[]\n"
+     ]
+    }
+   ],
+   "source": [
+    "file_path = '../data/nobg/every-pal-in-palworld-a-complete-paldeck-list/016 Palworld Teafant.png.png'\n",
+    "\n",
+    "image_pil = Image.open(file_path)\n",
+    "image = np.array(image_pil)\n",
+    "\n",
+    "# Get the bounding boxes of the objects in the image\n",
+    "contours = get_object_bounding_boxes(image)\n",
+    "print(contours)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "input_dir = \"../data/nobg\"\n",
+    "output_dir = \"../data/cropped\"\n",
+    "\n",
+    "def get_max_bounding_rect(contours):\n",
+    "    if len(contours) == 0:\n",
+    "        return 0,0,0,0\n",
+    "\n",
+    "    c = max(contours, key=cv2.contourArea)\n",
+    "    x, y, w, h = cv2.boundingRect(c)\n",
+    "    return x, y, w, h\n",
+    "\n",
+    "# Loop over all files and subdirectories in the input directory\n",
+    "for root, dirs, files in os.walk(input_dir):\n",
+    "    for filename in files:\n",
+    "        # Construct full file path\n",
+    "        file_path = os.path.join(root, filename)\n",
+    "        \n",
+    "        # Open the image and convert it to numpy array\n",
+    "        image_pil = Image.open(file_path)\n",
+    "        image = np.array(image_pil)\n",
+    "        \n",
+    "        # Get the bounding boxes of the objects in the image\n",
+    "        contours = get_object_bounding_boxes(image)\n",
+    "        \n",
+    "        if len(contours) == 0:\n",
+    "            continue\n",
+    "        \n",
+    "        # Get the maximum bounding rectangle\n",
+    "        x, y, w, h = get_max_bounding_rect(contours)\n",
+    "        \n",
+    "        # Crop the image\n",
+    "        cropped_image = image[y:y+h, x:x+w]\n",
+    "        \n",
+    "        cropped_image_pil = Image.fromarray(cropped_image)\n",
+    "        \n",
+    "        # Create output subdirectory if it doesn't exist\n",
+    "        output_subdir = os.path.join(output_dir, os.path.relpath(root, input_dir))\n",
+    "        os.makedirs(output_subdir, exist_ok=True)\n",
+    "        \n",
+    "        # Save the cropped image to the output directory\n",
+    "        output_file_path = os.path.join(output_subdir, filename)\n",
+    "        cropped_image_pil.save(output_file_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pokemon-pal",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

notebooks/nobg.ipynb ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\hiroga\\miniconda3\\envs\\pokemon-pal\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "c:\\Users\\hiroga\\miniconda3\\envs\\pokemon-pal\\Lib\\site-packages\\torchvision\\transforms\\functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from carvekit.api.high import HiInterface\n",
+    "\n",
+    "# Check doc strings for more information\n",
+    "interface = HiInterface(object_type=\"object\",  # Can be \"object\" or \"hairs-like\".\n",
+    "                        batch_size_seg=5,\n",
+    "                        batch_size_matting=1,\n",
+    "                        device='cuda' if torch.cuda.is_available() else 'cpu',\n",
+    "                        seg_mask_size=640,  # Use 640 for Tracer B7 and 320 for U2Net\n",
+    "                        matting_mask_size=2048,\n",
+    "                        trimap_prob_threshold=231,\n",
+    "                        trimap_dilation=30,\n",
+    "                        trimap_erosion_iters=5,\n",
+    "                        fp16=False)\n",
+    "import os\n",
+    "\n",
+    "# input_dir = \"../data/raw\"\n",
+    "# output_dir = \"../data/nobg\"\n",
+    "input_dir = \"../data/raw/ポケットモンスターシールド\"\n",
+    "output_dir = \"../data/nobg/ポケットモンスターシールド\"\n",
+    "\n",
+    "# Create output directory if it doesn't exist\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "\n",
+    "# Loop over all files and subdirectories in the input directory\n",
+    "for root, dirs, files in os.walk(input_dir):\n",
+    "    for filename in files:\n",
+    "        # Construct full file path\n",
+    "        file_path = os.path.join(root, filename)\n",
+    "        \n",
+    "        # Process the image and remove the background\n",
+    "        images_without_background = interface([file_path])\n",
+    "        image_wo_bg = images_without_background[0]\n",
+    "        \n",
+    "        # Create output subdirectory if it doesn't exist\n",
+    "        output_subdir = os.path.join(output_dir, os.path.relpath(root, input_dir))\n",
+    "        os.makedirs(output_subdir, exist_ok=True)\n",
+    "        \n",
+    "        # Save the processed image to the output directory\n",
+    "        # Since the image format is RGBA, we save it as PNG\n",
+    "        filename = os.path.splitext(filename)[0] + \".png\"\n",
+    "        output_file_path = os.path.join(output_subdir, filename)\n",
+    "        image_wo_bg.save(output_file_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pokemon-pal",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

notebooks/train.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff