{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install transformers pinecone-client tqdm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The dataset used is the [Unsplash Lite dataset](https://github.com/unsplash/datasets)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
photo_idphoto_urlphoto_image_urlphoto_submitted_atphoto_featuredphoto_widthphoto_heightphoto_aspect_ratiophoto_descriptionphotographer_username...photo_location_countryphoto_location_citystats_viewsstats_downloadsai_descriptionai_primary_landmark_nameai_primary_landmark_latitudeai_primary_landmark_longitudeai_primary_landmark_confidenceblur_hash
0XMyPniM9LF0https://unsplash.com/photos/XMyPniM9LF0https://images.unsplash.com/uploads/1411949294...2014-09-29 00:08:38.594364t427228481.50Woman exploring a forestmichellespencer77...NaNNaN23754216967woman walking in the middle of forestNaNNaNNaNNaNL56bVcRRIWMh.gVunlS4SMbsRRxr
1rDLBArZUl1chttps://unsplash.com/photos/rDLBArZUl1chttps://images.unsplash.com/photo-141633941111...2014-11-18 19:36:57.08945t300040000.75Succulents in a terrariumugmonk...NaNNaN1378481582141succulent plants in clear glass terrariumNaNNaNNaNNaNLvI$4txu%2s:_4t6WUj]xat7RPoe
2cNDGZ2sQ3Bohttps://unsplash.com/photos/cNDGZ2sQ3Bohttps://images.unsplash.com/photo-142014251503...2015-01-01 20:02:02.097036t256417101.50Rural winter mountainsidejohnprice...NaNNaN13024613428rocky mountain under gray sky at daytimeNaNNaNNaNNaNLhMj%NxvM{t7_4t7aeoM%2M{ozj[
3iuZ_D1eoq9khttps://unsplash.com/photos/iuZ_D1eoq9khttps://images.unsplash.com/photo-141487280988...2014-11-01 20:15:13.410073t291243680.67Poppy seeds and flowerskrisatomic...NaNNaN289023833704red common poppy flower selective focus phographyNaNNaNNaNNaNLSC7DirZAsX7}Br@GEWWmnoLWCnj
4BeD3vjQ8SI0https://unsplash.com/photos/BeD3vjQ8SI0https://images.unsplash.com/photo-141700759404...2014-11-26 13:13:50.134383t489632641.50Silhouette near dark treesjonaseriksson...NaNNaN870486049662trees during night timeNaNNaNNaNNaNL25|_:V@0hxtI=W;odae0ht6=^NG
\n", "

5 rows × 31 columns

\n", "
" ], "text/plain": [ " photo_id photo_url \\\n", "0 XMyPniM9LF0 https://unsplash.com/photos/XMyPniM9LF0 \n", "1 rDLBArZUl1c https://unsplash.com/photos/rDLBArZUl1c \n", "2 cNDGZ2sQ3Bo https://unsplash.com/photos/cNDGZ2sQ3Bo \n", "3 iuZ_D1eoq9k https://unsplash.com/photos/iuZ_D1eoq9k \n", "4 BeD3vjQ8SI0 https://unsplash.com/photos/BeD3vjQ8SI0 \n", "\n", " photo_image_url \\\n", "0 https://images.unsplash.com/uploads/1411949294... \n", "1 https://images.unsplash.com/photo-141633941111... \n", "2 https://images.unsplash.com/photo-142014251503... \n", "3 https://images.unsplash.com/photo-141487280988... \n", "4 https://images.unsplash.com/photo-141700759404... \n", "\n", " photo_submitted_at photo_featured photo_width photo_height \\\n", "0 2014-09-29 00:08:38.594364 t 4272 2848 \n", "1 2014-11-18 19:36:57.08945 t 3000 4000 \n", "2 2015-01-01 20:02:02.097036 t 2564 1710 \n", "3 2014-11-01 20:15:13.410073 t 2912 4368 \n", "4 2014-11-26 13:13:50.134383 t 4896 3264 \n", "\n", " photo_aspect_ratio photo_description photographer_username ... \\\n", "0 1.50 Woman exploring a forest michellespencer77 ... \n", "1 0.75 Succulents in a terrarium ugmonk ... \n", "2 1.50 Rural winter mountainside johnprice ... \n", "3 0.67 Poppy seeds and flowers krisatomic ... \n", "4 1.50 Silhouette near dark trees jonaseriksson ... \n", "\n", " photo_location_country photo_location_city stats_views stats_downloads \\\n", "0 NaN NaN 2375421 6967 \n", "1 NaN NaN 13784815 82141 \n", "2 NaN NaN 1302461 3428 \n", "3 NaN NaN 2890238 33704 \n", "4 NaN NaN 8704860 49662 \n", "\n", " ai_description ai_primary_landmark_name \\\n", "0 woman walking in the middle of forest NaN \n", "1 succulent plants in clear glass terrarium NaN \n", "2 rocky mountain under gray sky at daytime NaN \n", "3 red common poppy flower selective focus phography NaN \n", "4 trees during night time NaN \n", "\n", " ai_primary_landmark_latitude ai_primary_landmark_longitude \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " ai_primary_landmark_confidence blur_hash \n", "0 NaN L56bVcRRIWMh.gVunlS4SMbsRRxr \n", "1 NaN LvI$4txu%2s:_4t6WUj]xat7RPoe \n", "2 NaN LhMj%NxvM{t7_4t7aeoM%2M{ozj[ \n", "3 NaN LSC7DirZAsX7}Br@GEWWmnoLWCnj \n", "4 NaN L25|_:V@0hxtI=W;odae0ht6=^NG \n", "\n", "[5 rows x 31 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "images = pd.read_csv('photos.tsv000', delimiter='\\t')\n", "images.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We download using the `photo_image_url` field." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from PIL import Image\n", "import requests\n", "from io import BytesIO\n", "\n", "url = images['photo_image_url'].iloc[0]\n", "\n", "response = requests.get(url)\n", "img = Image.open(BytesIO(response.content))\n", "img" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We need to use these images to create vector embeddings, to do this we will use OpenAI's CLIP from the `transformers` library.\n", "\n", "```\n", "!pip install transformers\n", "```" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2022-08-12 14:07:47.935784: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", "ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.\n" ] } ], "source": [ "from transformers import CLIPProcessor, CLIPModel\n", "import torch\n", "\n", "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "model_name = \"openai/clip-vit-base-patch32\"\n", "\n", "model = CLIPModel.from_pretrained(model_name).to(device)\n", "processor = CLIPProcessor.from_pretrained(model_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we're ready to use the vision transformer (ViT) portion of CLIP to create feature vectors (embedding representations) from the image." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "img = processor(\n", " text=None,\n", " images=img,\n", " return_tensors='pt',\n", " padding=True\n", ")['pixel_values'].to(device)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([1, 512])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out = model.get_image_features(pixel_values=img)\n", "out.shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([512])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out = out.squeeze(0)\n", "out.shape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(512,)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emb = out.cpu().detach().numpy()\n", "emb.shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(-7.985501, 2.0108054)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emb.min(), emb.max()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we have a single `512` dimensional vector that represents the *meaning* behind the image. As we will be using dot product similarity we should also normalize these vectors." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "emb = emb / np.linalg.norm(emb)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(-0.56626415, 0.13343191)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emb.min(), emb.max()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Indexing\n", "\n", "To index this image in Pinecone we first install the Pinecone client:\n", "\n", "```\n", "!pip install pinecone-client\n", "```\n", "\n", "And then initialize our connection to Pinecone, this requires a [free API key](https://app.pinecone.io/)." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import pinecone\n", "\n", "index_name = \"unsplash-25k-clip\"\n", "\n", "pinecone.init(\n", " api_key=\"<>\",\n", " environment=\"us-west1-gcp\"\n", ")\n", "\n", "if index_name not in pinecone.list_indexes():\n", " pinecone.create_index(\n", " index_name,\n", " emb.shape[0],\n", " metric=\"dotproduct\"\n", " )\n", "# connect to the index\n", "index = pinecone.Index(index_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To upsert the single feature embedding we have created, we use `upsert`. There are also some possibly relevant metadata info we might want to add." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'photo_url': 'https://unsplash.com/photos/XMyPniM9LF0',\n", " 'photo_image_url': 'https://images.unsplash.com/uploads/14119492946973137ce46/f1f2ebf3',\n", " 'photo_submitted_at': '2014-09-29 00:08:38.594364',\n", " 'photo_description': 'Woman exploring a forest',\n", " 'photographer_username': 'michellespencer77',\n", " 'ai_description': 'woman walking in the middle of forest'}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "row = images.iloc[0]\n", "\n", "_id = row['photo_id']\n", "meta = {\n", " \"photo_url\": row[\"photo_url\"],\n", " \"photo_image_url\": row[\"photo_image_url\"],\n", " \"photo_submitted_at\": row[\"photo_submitted_at\"],\n", " \"photo_description\": row[\"photo_description\"],\n", " \"photographer_username\": row[\"photographer_username\"],\n", " \"ai_description\": row[\"ai_description\"]\n", "}\n", "\n", "meta" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'upserted_count': 1}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "to_upsert = [(_id, emb.tolist(), meta)]\n", "\n", "index.upsert(to_upsert)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'XMyPniM9LF0'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "_id" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "Note that we added a string ID value `\"XMyPniM9LF0\"` and also converted the feature embedding tensor to a flat list before adding to our Pinecone index.\n", "\n", "## Indexing Everything\n", "\n", "So far we've built one feature embedding and indexed it in Pinecone, now let's repeat the process for a lot of images.\n", "\n", "We will do this in batches, taking `32` images at a time, embedding them with Resnet-50, and indexing them in Pinecone." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6726c0eb47de4cd780f3e1096a2d743f", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1370 [00:00