Spaces:

ThinkAI-Morocco
/

artigen

Build error

App Files Files Community

CallmeKaito commited on May 19, 2024

Commit

649121a

verified ·

1 Parent(s): 6903ffe

Upload 10 files

Browse files

Files changed (10) hide show

data/.DS_Store +0 -0
data/10106922982.jpeg +0 -0
data/10111325994.jpeg +0 -0
data/10113394119.jpeg +0 -0
data/10119695953.jpeg +0 -0
data/thuya.jpeg +0 -0
notebooks/.DS_Store +0 -0
notebooks/CLIP (3).ipynb +836 -0
notebooks/LLaVa (1).ipynb +0 -0
notebooks/SAM (1).ipynb +0 -0

data/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

data/10106922982.jpeg ADDED Viewed

data/10111325994.jpeg ADDED Viewed

data/10113394119.jpeg ADDED Viewed

data/10119695953.jpeg ADDED Viewed

data/thuya.jpeg ADDED Viewed

notebooks/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

notebooks/CLIP (3).ipynb ADDED Viewed

	@@ -0,0 +1,836 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "sYaX1Rf8pCWN",
+    "outputId": "f52aaf57-323d-46ff-908f-f188525b830a",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting ftfy\n",
+      "  Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)\n",
+      "\u001b[K     |████████████████████████████████| 54 kB 3.5 MB/s eta 0:00:011\n",
+      "\u001b[?25hCollecting regex\n",
+      "  Downloading regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (774 kB)\n",
+      "\u001b[K     |████████████████████████████████| 774 kB 4.9 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: tqdm in /home/user/miniconda/lib/python3.9/site-packages (4.61.2)\n",
+      "Requirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /home/user/miniconda/lib/python3.9/site-packages (from ftfy) (0.2.13)\n",
+      "Installing collected packages: regex, ftfy\n",
+      "Successfully installed ftfy-6.2.0 regex-2024.5.15\n",
+      "Collecting git+https://github.com/openai/CLIP.git\n",
+      "  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-7h9f8ksf\n",
+      "  Running command git clone -q https://github.com/openai/CLIP.git /tmp/pip-req-build-7h9f8ksf\n",
+      "Requirement already satisfied: ftfy in /home/user/miniconda/lib/python3.9/site-packages (from clip==1.0) (6.2.0)\n",
+      "Requirement already satisfied: regex in /home/user/miniconda/lib/python3.9/site-packages (from clip==1.0) (2024.5.15)\n",
+      "Requirement already satisfied: tqdm in /home/user/miniconda/lib/python3.9/site-packages (from clip==1.0) (4.61.2)\n",
+      "Collecting torch\n",
+      "  Downloading torch-2.3.0-cp39-cp39-manylinux1_x86_64.whl (779.1 MB)\n",
+      "\u001b[K     |█████████████▎                  | 322.4 MB 155.1 MB/s eta 0:00:03"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub data rate exceeded.\n",
+      "The Jupyter server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--ServerApp.iopub_data_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
+      "ServerApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[K     |█████████████████████████████▉  | 726.2 MB 140.6 MB/s eta 0:00:01"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub data rate exceeded.\n",
+      "The Jupyter server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--ServerApp.iopub_data_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
+      "ServerApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[K     |████████████████████████████████| 779.1 MB 39 kB/s \n",
+      "\u001b[?25hCollecting torchvision\n",
+      "  Downloading torchvision-0.18.0-cp39-cp39-manylinux1_x86_64.whl (7.0 MB)\n",
+      "\u001b[K     |████████████████████████████████| 7.0 MB 117.1 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: wcwidth<0.3.0,>=0.2.12 in /home/user/miniconda/lib/python3.9/site-packages (from ftfy->clip==1.0) (0.2.13)\n",
+      "Collecting filelock\n",
+      "  Downloading filelock-3.14.0-py3-none-any.whl (12 kB)\n",
+      "Requirement already satisfied: jinja2 in /home/user/miniconda/lib/python3.9/site-packages (from torch->clip==1.0) (3.1.4)\n",
+      "Collecting nvidia-cuda-nvrtc-cu12==12.1.105\n",
+      "  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
+      "\u001b[K     |████████████████████████████████| 23.7 MB 111.3 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26\n",
+      "  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
+      "\u001b[K     |████████████▎                   | 281.1 MB 157.5 MB/s eta 0:00:03"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub data rate exceeded.\n",
+      "The Jupyter server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--ServerApp.iopub_data_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
+      "ServerApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[K     |██████████████████████████████  | 687.7 MB 121.2 MB/s eta 0:00:01"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub data rate exceeded.\n",
+      "The Jupyter server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--ServerApp.iopub_data_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
+      "ServerApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[K     |████████████████████████████████| 731.7 MB 27 kB/s \n",
+      "\u001b[?25hCollecting triton==2.3.0\n",
+      "  Downloading triton-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (168.1 MB)\n",
+      "\u001b[K     |████████████████████████████████| 168.1 MB 163.1 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-nccl-cu12==2.20.5\n",
+      "  Downloading nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
+      "\u001b[K     |████████████████████████████████| 176.2 MB 157 kB/s s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1\n",
+      "  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
+      "\u001b[K     |██████████████████████▊         | 291.1 MB 155.6 MB/s eta 0:00:01"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub data rate exceeded.\n",
+      "The Jupyter server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--ServerApp.iopub_data_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
+      "ServerApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[K     |████████████████████████████████| 410.6 MB 11 kB/s /s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106\n",
+      "  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
+      "\u001b[K     |████████████████████████████████| 56.5 MB 125.6 MB/s eta 0:00:01███████████████████▉            | 35.0 MB 125.6 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: typing-extensions>=4.8.0 in /home/user/miniconda/lib/python3.9/site-packages (from torch->clip==1.0) (4.11.0)\n",
+      "Collecting nvidia-cusolver-cu12==11.4.5.107\n",
+      "  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
+      "\u001b[K     |████████████████████████████████| 124.2 MB 144.5 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting sympy\n",
+      "  Downloading sympy-1.12-py3-none-any.whl (5.7 MB)\n",
+      "\u001b[K     |████████████████████████████████| 5.7 MB 109.2 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting fsspec\n",
+      "  Downloading fsspec-2024.5.0-py3-none-any.whl (316 kB)\n",
+      "\u001b[K     |████████████████████████████████| 316 kB 119.1 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105\n",
+      "  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
+      "\u001b[K     |████████████████████████████████| 823 kB 119.5 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105\n",
+      "  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
+      "\u001b[K     |████████████████████████████████| 14.1 MB 126.1 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54\n",
+      "  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
+      "\u001b[K     |████████████████████████████████| 121.6 MB 4.8 MB/s eta 0:00:011\n",
+      "\u001b[?25hCollecting networkx\n",
+      "  Downloading networkx-3.2.1-py3-none-any.whl (1.6 MB)\n",
+      "\u001b[K     |████████████████████████████████| 1.6 MB 112.8 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106\n",
+      "  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
+      "\u001b[K     |████████████████████████████████| 196.0 MB 154.4 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105\n",
+      "  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
+      "\u001b[K     |████████████████████████████████| 99 kB 39.0 MB/s  eta 0:00:01\n",
+      "\u001b[?25hCollecting nvidia-nvjitlink-cu12\n",
+      "  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n",
+      "\u001b[K     |████████████████████████████████| 21.1 MB 123.7 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /home/user/miniconda/lib/python3.9/site-packages (from jinja2->torch->clip==1.0) (2.1.5)\n",
+      "Collecting mpmath>=0.19\n",
+      "  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
+      "\u001b[K     |████████████████████████████████| 536 kB 125.5 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting pillow!=8.3.*,>=5.3.0\n",
+      "  Downloading pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl (4.5 MB)\n",
+      "\u001b[K     |████████████████████████████████| 4.5 MB 123.5 MB/s eta 0:00:01\n",
+      "\u001b[?25hCollecting numpy\n",
+      "  Downloading numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n",
+      "\u001b[K     |████████████████████████████████| 18.2 MB 113.2 MB/s eta 0:00:01     | 1.1 MB 113.2 MB/s eta 0:00:01\n",
+      "\u001b[?25hBuilding wheels for collected packages: clip\n",
+      "  Building wheel for clip (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369525 sha256=2d16eeced15e3729c52334f9be57fd2ddca900110e745c1af86ab5aade88cd62\n",
+      "  Stored in directory: /tmp/pip-ephem-wheel-cache-8vr04co8/wheels/c8/e4/e1/11374c111387672fc2068dfbe0d4b424cb9cdd1b2e184a71b5\n",
+      "Successfully built clip\n",
+      "Installing collected packages: nvidia-nvjitlink-cu12, nvidia-cusparse-cu12, nvidia-cublas-cu12, mpmath, filelock, triton, sympy, nvidia-nvtx-cu12, nvidia-nccl-cu12, nvidia-cusolver-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cudnn-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, networkx, fsspec, torch, pillow, numpy, torchvision, clip\n",
+      "Successfully installed clip-1.0 filelock-3.14.0 fsspec-2024.5.0 mpmath-1.3.0 networkx-3.2.1 numpy-1.26.4 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105 pillow-10.3.0 sympy-1.12 torch-2.3.0 torchvision-0.18.0 triton-2.3.0\n",
+      "\u001b[33mWARNING: Requirement 'sentencepiece-0.1.98-cp311-cp311-win_amd64.whl' looks like a filename, but the file does not exist\u001b[0m\n",
+      "\u001b[31mERROR: sentencepiece-0.1.98-cp311-cp311-win_amd64.whl is not a supported wheel on this platform.\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install ftfy regex tqdm\n",
+    "!pip install git+https://github.com/openai/CLIP.git\n",
+    "!pip install sentencepiece-0.1.98-cp311-cp311-win_amd64.whl\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "Zuat0Supqs7r",
+    "outputId": "f3ec0a32-0d58-4241-d3f2-621828297c43",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting transformers\n",
+      "  Downloading transformers-4.41.0-py3-none-any.whl (9.1 MB)\n",
+      "\u001b[K     |████████████████████████████████| 9.1 MB 4.3 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (4.61.2)\n",
+      "Collecting tokenizers<0.20,>=0.19\n",
+      "  Downloading tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
+      "\u001b[K     |████████████████████████████████| 3.6 MB 104.9 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (6.0.1)\n",
+      "Requirement already satisfied: filelock in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (3.14.0)\n",
+      "Requirement already satisfied: numpy>=1.17 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (1.26.4)\n",
+      "Collecting huggingface-hub<1.0,>=0.23.0\n",
+      "  Downloading huggingface_hub-0.23.0-py3-none-any.whl (401 kB)\n",
+      "\u001b[K     |████████████████████████████████| 401 kB 120.0 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (24.0)\n",
+      "Requirement already satisfied: regex!=2019.12.17 in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (2024.5.15)\n",
+      "Requirement already satisfied: requests in /home/user/miniconda/lib/python3.9/site-packages (from transformers) (2.31.0)\n",
+      "Collecting safetensors>=0.4.1\n",
+      "  Downloading safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+      "\u001b[K     |████████████████████████████████| 1.2 MB 95.0 MB/s eta 0:00:01\n",
+      "\u001b[?25hRequirement already satisfied: typing-extensions>=3.7.4.3 in /home/user/miniconda/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (4.11.0)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /home/user/miniconda/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (2024.5.0)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (2021.5.30)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (2.10)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/user/miniconda/lib/python3.9/site-packages (from requests->transformers) (1.26.6)\n",
+      "Installing collected packages: huggingface-hub, tokenizers, safetensors, transformers\n",
+      "Successfully installed huggingface-hub-0.23.0 safetensors-0.4.3 tokenizers-0.19.1 transformers-4.41.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# prompt: install transformers\n",
+    "\n",
+    "!pip install transformers\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "id": "8xOP6veIq5LM",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.0066907405853271484,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "preprocessor_config.json",
+       "rate": null,
+       "total": 228,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d43500a3f8b1440baaaf1337fd547030",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "preprocessor_config.json:   0%|          | 0.00/228 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/user/miniconda/lib/python3.9/site-packages/transformers/models/vit/feature_extraction_vit.py:28: FutureWarning: The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use ViTImageProcessor instead.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.004696846008300781,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "tokenizer_config.json",
+       "rate": null,
+       "total": 241,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bf4f06b628644ec8a638e5f32bd00324",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/241 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.004175662994384766,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "vocab.json",
+       "rate": null,
+       "total": 798156,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ffc926da2aa540f2a1760c3bb4fb4909",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.004157304763793945,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "merges.txt",
+       "rate": null,
+       "total": 456356,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "302ae34c419d484a9b16e025d6d2690b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.004187107086181641,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "tokenizer.json",
+       "rate": null,
+       "total": 1355446,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "de2f6cacd09a43c98c06cf4e4243c7c7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.004050254821777344,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "special_tokens_map.json",
+       "rate": null,
+       "total": 120,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c4921bf4d08d4156a1904fabe261235c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "special_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.004579067230224609,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "config.json",
+       "rate": null,
+       "total": 4609,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2c6081497e1542ab9f86e1f763a46101",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/4.61k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/json": {
+       "ascii": false,
+       "bar_format": null,
+       "colour": null,
+       "elapsed": 0.0045909881591796875,
+       "initial": 0,
+       "n": 0,
+       "ncols": null,
+       "nrows": null,
+       "postfix": null,
+       "prefix": "pytorch_model.bin",
+       "rate": null,
+       "total": 982141993,
+       "unit": "B",
+       "unit_divisor": 1000,
+       "unit_scale": true
+      },
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eafcdd2e978a42659bef0a50f82a7055",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer\n",
+    "\n",
+    "\n",
+    "feature_extractor = ViTFeatureExtractor.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
+    "model = VisionEncoderDecoderModel.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "uYLlkIWgqGwX"
+   },
+   "source": [
+    "## Import the necessary libraries and load the CLIP model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "id": "dLxPnrUQqDZU",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|███████████████████████████████████████| 338M/338M [00:12<00:00, 28.0MiB/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from PIL import Image\n",
+    "import clip\n",
+    "import torch\n",
+    "\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "clip_model, preprocess = clip.load(\"ViT-B/32\", device=device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Gt1Q-d1iqM9F"
+   },
+   "source": [
+    "## Define a function to generate product descriptions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "id": "u2XdvaffqGMr",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n",
+      "You may ignore this warning if your `pad_token_id` (50256) is identical to the `bos_token_id` (50256), `eos_token_id` (50256), or the `sep_token_id` (None), and your input is not padded.\n"
+     ]
+    }
+   ],
+   "source": [
+    "image = Image.open(\"data/download.jpeg\")\n",
+    "pixel_values = feature_extractor(images=image, return_tensors=\"pt\").pixel_values\n",
+    "output_ids = model.generate(pixel_values, max_length=50, num_beams=4, early_stopping=True)\n",
+    "captions = tokenizer.batch_decode(output_ids, skip_special_tokens=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "lOf9lcUAqVlm",
+    "outputId": "d00cdc05-6652-4fba-b40c-03ad803d54e3",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a vase sitting on top of a table \n"
+     ]
+    }
+   ],
+   "source": [
+    "image = preprocess(image).unsqueeze(0).to(device)\n",
+    "with torch.no_grad():\n",
+    "    image_features = clip_model.encode_image(image)\n",
+    "\n",
+    "text_inputs = torch.cat([clip.tokenize(caption).to(device) for caption in captions]).to(device)\n",
+    "with torch.no_grad():\n",
+    "    text_features = clip_model.encode_text(text_inputs)\n",
+    "\n",
+    "similarity_scores = image_features @ text_features.T\n",
+    "best_caption_idx = similarity_scores.argmax().item()\n",
+    "product_description = captions[best_caption_idx]\n",
+    "print(product_description)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "RM6RXXvT4xSN"
+   },
+   "source": [
+    "# Using SigLip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting protobuf\n",
+      "  Downloading protobuf-5.26.1-cp37-abi3-manylinux2014_x86_64.whl (302 kB)\n",
+      "\u001b[K     |████████████████████████████████| 302 kB 4.3 MB/s eta 0:00:01\n",
+      "\u001b[?25hInstalling collected packages: protobuf\n",
+      "Successfully installed protobuf-5.26.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install sentencepiece\n",
+    "!pip install protobuf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "fR9c1mv3qXGz",
+    "outputId": "5b222c53-e0f8-4545-f191-ad6a90ab1373",
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/user/miniconda/lib/python3.9/site-packages/transformers/models/vit/feature_extraction_vit.py:28: FutureWarning: The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use ViTImageProcessor instead.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "an old fashioned clock sitting on top of a table \n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoProcessor, AutoModel, VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer\n",
+    "import torch\n",
+    "from PIL import Image\n",
+    "\n",
+    "\n",
+    "model = AutoModel.from_pretrained(\"google/siglip-base-patch16-224\")\n",
+    "processor = AutoProcessor.from_pretrained(\"google/siglip-base-patch16-224\")\n",
+    "\n",
+    "\n",
+    "image = Image.open(\"data/avito4.jpeg\")\n",
+    "inputs = processor(images=image, return_tensors=\"pt\")\n",
+    "\n",
+    "\n",
+    "feature_extractor = ViTFeatureExtractor.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
+    "model = VisionEncoderDecoderModel.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
+    "\n",
+    "pixel_values = feature_extractor(images=image, return_tensors=\"pt\").pixel_values\n",
+    "output_ids = model.generate(pixel_values, max_length=100, num_beams=5, early_stopping=True)\n",
+    "captions = tokenizer.batch_decode(output_ids, skip_special_tokens=True)\n",
+    "\n",
+    "image = preprocess(image).unsqueeze(0).to(device)\n",
+    "with torch.no_grad():\n",
+    "    image_features = clip_model.encode_image(image)\n",
+    "\n",
+    "text_inputs = torch.cat([clip.tokenize(caption).to(device) for caption in captions]).to(device)\n",
+    "with torch.no_grad():\n",
+    "    text_features = clip_model.encode_text(text_inputs)\n",
+    "\n",
+    "similarity_scores = image_features @ text_features.T\n",
+    "best_caption_idx = similarity_scores.argmax().item()\n",
+    "product_description = captions[best_caption_idx]\n",
+    "print(product_description)\n",
+    "\n",
+    "# a vase sitting on a shelf in a store => thuya\n",
+    "# a wooden bench sitting on top of a wooden floor => avito\n",
+    "## two old fashioned vases sitting next to each other => avito2\n",
+    "## three wooden vases sitting on top of a wooden floor => avito3\n",
+    "# an old fashioned clock sitting on top of a table => avito4\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "fR9c1mv3qXGz",
+    "outputId": "5b222c53-e0f8-4545-f191-ad6a90ab1373",
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "qRkGmKyYB7DM"
+   },
+   "source": [
+    "# Implemeting LLaVa"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "u6jq8q__zoOt"
+   },
+   "source": [
+    "https://colab.research.google.com/drive/1veefV17NcD1S4ou4nF8ABkfm8-TgU0Dr#scrollTo=XN2vJCPZk1UY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "QyO2UcBjzl71"
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

notebooks/LLaVa (1).ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/SAM (1).ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff