{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "85d74fc2-25df-4abb-986a-b0ec3c8949b6", "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torchvision.transforms as transforms\n", "import matplotlib.pyplot as plt\n", "from PIL import Image\n", "import numpy as np\n", "import matplotlib.cm as cm\n", "import os\n", "# 加载Midas深度检测模型\n", "model = torch.hub.load('intel-isl/MiDaS', 'MiDaS')\n", "# 设置模型为评估模式\n", "model.eval()\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "print(torch.cuda.is_available())\n", "model = model.to(device)" ] }, { "cell_type": "code", "execution_count": 4, "id": "2432caaa-8d6d-4158-b009-9d6ad4b79c90", "metadata": {}, "outputs": [], "source": [ "def img_depth(imgdir, n1, n2=True):\n", " # 加载图像\n", " image = Image.open(imgdir)\n", "\n", " # 调整图像大小,使最长边等于n1\n", " max_size = n1\n", " if image.size[0] > image.size[1]:\n", " wpercent = max_size / float(image.size[0])\n", " hsize = int((float(image.size[1]) * float(wpercent)))\n", " image = image.resize((max_size, hsize), Image.Resampling.BILINEAR)\n", " else:\n", " hpercent = max_size / float(image.size[1])\n", " wsize = int((float(image.size[0]) * float(hpercent)))\n", " image = image.resize((wsize, max_size), Image.Resampling.BILINEAR)\n", "\n", " # 创建一个n1*n1的纯黑色图像\n", " background = Image.new('RGB', (n1, n1), (0, 0, 0))\n", "\n", " # 计算输入图像在背景图像中的位置\n", " bg_w, bg_h = background.size\n", " img_w, img_h = image.size\n", " offset = ((bg_w - img_w) // 2, (bg_h - img_h) // 2)\n", "\n", " # 将输入图像放在背景图像中\n", " background.paste(image, offset)\n", "\n", " # 将图像转换为张量\n", " input_image = transforms.ToTensor()(background).unsqueeze(0).to(device)\n", "\n", " # 生成深度图\n", " with torch.no_grad():\n", " prediction = model(input_image)\n", "\n", " # 将深度图转换为numpy数组,并归一化到0-255\n", " depth_array = prediction.squeeze().cpu().numpy()\n", " if n2:\n", " depth_array = ((depth_array - depth_array.min()) * (255 / (depth_array.max() - depth_array.min()))).astype(np.uint8)\n", " depth_array = cm.gray(depth_array)\n", "\n", " return depth_array\n", "\n", "def dir_depth(dir1, dir2, n1, n2=True):\n", " # 检查输出目录是否存在,如果不存在则创建\n", " if not os.path.exists(dir2):\n", " os.makedirs(dir2)\n", "\n", " # 处理输入目录中的所有图像\n", " count = 0\n", " for filename in os.listdir(dir1):\n", " if filename.endswith('.png') or filename.endswith('.jpg'):\n", " # 生成深度图\n", " depth_array = img_depth(os.path.join(dir1, filename), n1, n2)\n", "\n", " # 保存深度图为png文件\n", " plt.imsave(os.path.join(dir2, os.path.splitext(filename)[0] + '_depth.png'), depth_array)\n", "\n", " count += 1\n", "\n", " return f'已经转化{count}张图像'\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "2d869e3d-bbcf-497d-9044-7178df0fe4ef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: total: 16 s\n", "Wall time: 20.1 s\n" ] }, { "data": { "text/plain": [ "'已经转化72张图像'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "dir_depth('928\\\\hairdryer\\\\928-2', '928\\\\hairdryer\\\\928-2-dp', 960, True)" ] }, { "cell_type": "code", "execution_count": null, "id": "29482ccb-e1b2-4cb4-9806-52feaf954121", "metadata": {}, "outputs": [], "source": [ "\n", "# 可视化深度图\n", "plt.imshow(prediction.squeeze().cpu(), cmap='viridis')\n", "plt.axis('off')\n", "plt.show()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }