{ "cells": [ { "cell_type": "code", "execution_count": 28, "metadata": { "tags": [] }, "outputs": [ { "ename": "ImportError", "evalue": "cannot import name 'data_path' from 'utils' (/Users/yonglinwu/dev/image-search-playground/utils.py)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[28], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\n\u001b[1;32m 7\u001b[0m torch\u001b[39m.\u001b[39mset_printoptions(precision\u001b[39m=\u001b[39m\u001b[39m10\u001b[39m)\n\u001b[0;32m----> 9\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mutils\u001b[39;00m \u001b[39mimport\u001b[39;00m get_image_embeddings, model_name_to_ids, load_models, model_dict, data_path\n\u001b[1;32m 11\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mwarnings\u001b[39;00m\n\u001b[1;32m 12\u001b[0m warnings\u001b[39m.\u001b[39msimplefilter(action\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m'\u001b[39m, category\u001b[39m=\u001b[39m\u001b[39mFutureWarning\u001b[39;00m)\n", "\u001b[0;31mImportError\u001b[0m: cannot import name 'data_path' from 'utils' (/Users/yonglinwu/dev/image-search-playground/utils.py)" ] } ], "source": [ "from sentence_transformers import SentenceTransformer, util\n", "from PIL import Image\n", "import pandas as pd\n", "import os\n", "import numpy as np\n", "import torch\n", "torch.set_printoptions(precision=10)\n", "\n", "from utils import get_image_embeddings, model_name_to_ids, load_models, model_dict, data_path\n", "\n", "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": { "tags": [] }, "outputs": [], "source": [ "patagonia_df = pd.read_csv(data_path + 'metadata/patagonia_losGatos.tsv', sep='\\t')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
brandtitleproduct_urlpricedescriptionsizecategorycolorsPoshmarkUnnamed: 9...Unnamed: 38Unnamed: 39Unnamed: 40Unnamed: 41Unnamed: 42Unnamed: 43Unnamed: 44Unnamed: 45Unnamed: 46Unnamed: 47
0PatagoniaPatagonia Women's Los Gatos Fleece 1/4-Zip Smo...https://poshmark.com/listing/63d4821f2fbf1afe8...$36.00A soft, warm and versatile quarter-zip pullove...MTops[{'name': 'Gray', 'rgb': '#929292', 'message_i...PoshmarkFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1PatagoniaPatagonia Los Gatos 1/4 Zip Pullover M Beech B...https://poshmark.com/listing/63fcd7709f212bd48...$59.00High pile, quarter zip pulllover\\nMeasurements...MTops[{'name': 'Brown', 'rgb': '#663509', 'message_...PoshmarkFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2PatagoniaPATAGONIA Women's Los Gatos Fleece 1/4-Zip Pul...https://poshmark.com/listing/642b9bbcfed51f812...$59.00PATAGONIA Women's Los Gatos Fleece 1/4-Zip Pul...STops[{'name': 'White', 'rgb': '#FFFFFF', 'message_...PoshmarkFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3PatagoniaGirl’s Patagonia Los Gatos Fleece 1/4 Zip XShttps://poshmark.com/listing/63f4f459c5df6c7f8...$30.00Girl’s Patagonia Los Gatos 1/4 Zip Fleece\\n\\n-...XSGOther[{'name': 'Tan', 'rgb': '#d1b48e', 'message_id...PoshmarkFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4PatagoniaPatagonia Los Gatos Quarter Zip Greyhttps://poshmark.com/listing/622cc43d3a0db900b...$59.00Patagonia Los Gatos Quarter Zip Grey \\nWomen’s...MTops[{'name': 'Gray', 'rgb': '#929292', 'message_i...PoshmarkFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 48 columns

\n", "
" ], "text/plain": [ " brand title \\\n", "0 Patagonia Patagonia Women's Los Gatos Fleece 1/4-Zip Smo... \n", "1 Patagonia Patagonia Los Gatos 1/4 Zip Pullover M Beech B... \n", "2 Patagonia PATAGONIA Women's Los Gatos Fleece 1/4-Zip Pul... \n", "3 Patagonia Girl’s Patagonia Los Gatos Fleece 1/4 Zip XS \n", "4 Patagonia Patagonia Los Gatos Quarter Zip Grey \n", "\n", " product_url price \\\n", "0 https://poshmark.com/listing/63d4821f2fbf1afe8... $36.00 \n", "1 https://poshmark.com/listing/63fcd7709f212bd48... $59.00 \n", "2 https://poshmark.com/listing/642b9bbcfed51f812... $59.00 \n", "3 https://poshmark.com/listing/63f4f459c5df6c7f8... $30.00 \n", "4 https://poshmark.com/listing/622cc43d3a0db900b... $59.00 \n", "\n", " description size category \\\n", "0 A soft, warm and versatile quarter-zip pullove... M Tops \n", "1 High pile, quarter zip pulllover\\nMeasurements... M Tops \n", "2 PATAGONIA Women's Los Gatos Fleece 1/4-Zip Pul... S Tops \n", "3 Girl’s Patagonia Los Gatos 1/4 Zip Fleece\\n\\n-... XSG Other \n", "4 Patagonia Los Gatos Quarter Zip Grey \\nWomen’s... M Tops \n", "\n", " colors Poshmark Unnamed: 9 \\\n", "0 [{'name': 'Gray', 'rgb': '#929292', 'message_i... Poshmark False \n", "1 [{'name': 'Brown', 'rgb': '#663509', 'message_... Poshmark False \n", "2 [{'name': 'White', 'rgb': '#FFFFFF', 'message_... Poshmark False \n", "3 [{'name': 'Tan', 'rgb': '#d1b48e', 'message_id... Poshmark False \n", "4 [{'name': 'Gray', 'rgb': '#929292', 'message_i... Poshmark False \n", "\n", " ... Unnamed: 38 Unnamed: 39 Unnamed: 40 Unnamed: 41 Unnamed: 42 \\\n", "0 ... NaN NaN NaN NaN NaN \n", "1 ... NaN NaN NaN NaN NaN \n", "2 ... NaN NaN NaN NaN NaN \n", "3 ... NaN NaN NaN NaN NaN \n", "4 ... NaN NaN NaN NaN NaN \n", "\n", " Unnamed: 43 Unnamed: 44 Unnamed: 45 Unnamed: 46 Unnamed: 47 \n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", "[5 rows x 48 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "patagonia_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#download_images(patagonia_df, data_path)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "tags": [] }, "outputs": [], "source": [ "load_models()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "tags": [] }, "outputs": [], "source": [ "def generate_embeddings():\n", " embeddings_df = pd.DataFrame()\n", "\n", " # Get image embeddings\n", " with torch.no_grad():\n", " for fp in os.listdir(data_path + 'images/'):\n", " if fp.endswith('.jpg'):\n", " new_row = {'name': fp}\n", " for model_name in model_name_to_ids.keys():\n", " new_row[f'{model_name}-embedding'] = get_image_embeddings(model_name, Image.open(data_path + 'images/' + fp))\n", " embeddings_df = embeddings_df.append(new_row, ignore_index=True)\n", " return embeddings_df" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "tags": [] }, "outputs": [], "source": [ "fp = os.listdir(data_path + 'images/')[0]" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "tags": [] }, "outputs": [], "source": [ "model_name = 'fashion'" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "tags": [] }, "outputs": [], "source": [ "new_row = {'name': fp, f'{model_name}-embedding': get_image_embeddings(model_name, Image.open(data_path + 'images/' + fp))}\n", " " ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "tags": [] }, "outputs": [], "source": [ "embeddings_df = generate_embeddings()" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0Women's Under Armour Hustle Fleece Hoodie pull...[1.0734258, 0.99022365, 0.32032806, 0.2895219,...[0.23177437, -1.9268938, 0.273342, -0.02474568...[-0.32902592, -0.09434131, 0.3055967, 0.229937...
1Patagonia Los Gatos Fleece Grey Pullover.jpg[0.6227796, 0.026531212, 0.45240527, -0.488214...[0.38133767, -1.3040155, 1.1697398, -0.3085520...[-0.1695469, 0.5067289, 0.31120676, -0.0083701...
2REI Women's Down With It Quilted Hooded Parka ...[0.8497103, 1.2925782, -0.21685322, 0.24116844...[-0.30043703, -1.3144073, -0.33848628, 0.24008...[-0.24841668, 0.4876942, 0.39810008, -0.141552...
3Chanel Haute Couture Navy Blue Dress Semi Shee...[0.536018, 0.60787296, -0.2751825, 1.0325747, ...[-0.101031125, 0.033914, -0.44531134, -0.64656...[-0.08328074, 0.19443086, 0.14361368, 0.259305...
4Patagonia Women’s S Los Gatos Quarter-Zip Flee...[0.79398394, 1.3899276, -0.21383175, 0.0109823...[0.60070944, -1.1051046, 1.0572466, 0.47092092...[-0.27894062, -0.09589732, 0.5556799, -0.13458...
326Women's REI Elements Jacket Size M.jpg[0.6310029, 0.9942212, 0.009293936, 0.7862729,...[0.19858713, -1.8665266, -0.3323754, 0.0465058...[-0.0952643, 0.8016211, 0.08129032, 0.15187423...
327CHANEL Black cotton bodycon tank dress with zi...[1.0761135, 0.18927886, -0.007131472, 0.625682...[0.07516122, -0.1886161, 0.1334078, -0.2829321...[-0.12297699, 0.026368856, 0.04415588, 0.26031...
328Reformation X Veda Women's Bad Leather Jacket ...[0.79690784, 1.2895226, 0.22802149, -0.2736021...[-0.12224964, -0.38734418, 0.35824925, 0.95855...[0.6507246, 0.27751687, 0.36114892, -0.0831387...
329DISNEY HER UNIVERSE LILO AND STICH Rainbow Qua...[1.1617887, 0.19193622, 0.046035454, 0.4334900...[-0.20762922, 0.1754938, -0.7334341, -0.106492...[-0.31946087, 0.19534132, 0.37351555, -0.09741...
330PATAGONIA Nano Puff Jacket Zip Primaloft Insul...[0.2912089, 0.72192264, -0.01620815, 0.0022971...[0.0026952028, -1.6660439, 0.03839147, -0.2164...[0.12799336, 0.75828236, 0.10943861, -0.036647...
\n", "

331 rows × 4 columns

\n", "
" ], "text/plain": [ " name \\\n", "0 Women's Under Armour Hustle Fleece Hoodie pull... \n", "1 Patagonia Los Gatos Fleece Grey Pullover.jpg \n", "2 REI Women's Down With It Quilted Hooded Parka ... \n", "3 Chanel Haute Couture Navy Blue Dress Semi Shee... \n", "4 Patagonia Women’s S Los Gatos Quarter-Zip Flee... \n", ".. ... \n", "326 Women's REI Elements Jacket Size M.jpg \n", "327 CHANEL Black cotton bodycon tank dress with zi... \n", "328 Reformation X Veda Women's Bad Leather Jacket ... \n", "329 DISNEY HER UNIVERSE LILO AND STICH Rainbow Qua... \n", "330 PATAGONIA Nano Puff Jacket Zip Primaloft Insul... \n", "\n", " sentence-transformer-clip-ViT-L-14-embedding \\\n", "0 [1.0734258, 0.99022365, 0.32032806, 0.2895219,... \n", "1 [0.6227796, 0.026531212, 0.45240527, -0.488214... \n", "2 [0.8497103, 1.2925782, -0.21685322, 0.24116844... \n", "3 [0.536018, 0.60787296, -0.2751825, 1.0325747, ... \n", "4 [0.79398394, 1.3899276, -0.21383175, 0.0109823... \n", ".. ... \n", "326 [0.6310029, 0.9942212, 0.009293936, 0.7862729,... \n", "327 [1.0761135, 0.18927886, -0.007131472, 0.625682... \n", "328 [0.79690784, 1.2895226, 0.22802149, -0.2736021... \n", "329 [1.1617887, 0.19193622, 0.046035454, 0.4334900... \n", "330 [0.2912089, 0.72192264, -0.01620815, 0.0022971... \n", "\n", " fashion-embedding \\\n", "0 [0.23177437, -1.9268938, 0.273342, -0.02474568... \n", "1 [0.38133767, -1.3040155, 1.1697398, -0.3085520... \n", "2 [-0.30043703, -1.3144073, -0.33848628, 0.24008... \n", "3 [-0.101031125, 0.033914, -0.44531134, -0.64656... \n", "4 [0.60070944, -1.1051046, 1.0572466, 0.47092092... \n", ".. ... \n", "326 [0.19858713, -1.8665266, -0.3323754, 0.0465058... \n", "327 [0.07516122, -0.1886161, 0.1334078, -0.2829321... \n", "328 [-0.12224964, -0.38734418, 0.35824925, 0.95855... \n", "329 [-0.20762922, 0.1754938, -0.7334341, -0.106492... \n", "330 [0.0026952028, -1.6660439, 0.03839147, -0.2164... \n", "\n", " openai-clip-embedding \n", "0 [-0.32902592, -0.09434131, 0.3055967, 0.229937... \n", "1 [-0.1695469, 0.5067289, 0.31120676, -0.0083701... \n", "2 [-0.24841668, 0.4876942, 0.39810008, -0.141552... \n", "3 [-0.08328074, 0.19443086, 0.14361368, 0.259305... \n", "4 [-0.27894062, -0.09589732, 0.5556799, -0.13458... \n", ".. ... \n", "326 [-0.0952643, 0.8016211, 0.08129032, 0.15187423... \n", "327 [-0.12297699, 0.026368856, 0.04415588, 0.26031... \n", "328 [0.6507246, 0.27751687, 0.36114892, -0.0831387... \n", "329 [-0.31946087, 0.19534132, 0.37351555, -0.09741... \n", "330 [0.12799336, 0.75828236, 0.10943861, -0.036647... \n", "\n", "[331 rows x 4 columns]" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings_df" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "tags": [] }, "outputs": [], "source": [ "embeddings_path = os.path.join(data_path, 'metadata/patagonia_losGatos_embeddings.pq')\n", "embeddings_df.to_parquet(embeddings_path)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "tags": [] }, "outputs": [], "source": [ "embeddings_df = pd.read_parquet(embeddings_path)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "tags": [] }, "outputs": [], "source": [ "for i, row in embeddings_df.iterrows():\n", " if '\\n' in row['name']:\n", " print(row['name'])\n", " embeddings_df = embeddings_df.drop(i)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0Women's Under Armour Hustle Fleece Hoodie pull...[1.0734258, 0.99022365, 0.32032806, 0.2895219,...[0.23177437, -1.9268938, 0.273342, -0.02474568...[-0.32902592, -0.09434131, 0.3055967, 0.229937...
1Patagonia Los Gatos Fleece Grey Pullover.jpg[0.6227796, 0.026531212, 0.45240527, -0.488214...[0.38133767, -1.3040155, 1.1697398, -0.3085520...[-0.1695469, 0.5067289, 0.31120676, -0.0083701...
2REI Women's Down With It Quilted Hooded Parka ...[0.8497103, 1.2925782, -0.21685322, 0.24116844...[-0.30043703, -1.3144073, -0.33848628, 0.24008...[-0.24841668, 0.4876942, 0.39810008, -0.141552...
3Chanel Haute Couture Navy Blue Dress Semi Shee...[0.536018, 0.60787296, -0.2751825, 1.0325747, ...[-0.101031125, 0.033914, -0.44531134, -0.64656...[-0.08328074, 0.19443086, 0.14361368, 0.259305...
4Patagonia Women’s S Los Gatos Quarter-Zip Flee...[0.79398394, 1.3899276, -0.21383175, 0.0109823...[0.60070944, -1.1051046, 1.0572466, 0.47092092...[-0.27894062, -0.09589732, 0.5556799, -0.13458...
326Women's REI Elements Jacket Size M.jpg[0.6310029, 0.9942212, 0.009293936, 0.7862729,...[0.19858713, -1.8665266, -0.3323754, 0.0465058...[-0.0952643, 0.8016211, 0.08129032, 0.15187423...
327CHANEL Black cotton bodycon tank dress with zi...[1.0761135, 0.18927886, -0.007131472, 0.625682...[0.07516122, -0.1886161, 0.1334078, -0.2829321...[-0.12297699, 0.026368856, 0.04415588, 0.26031...
328Reformation X Veda Women's Bad Leather Jacket ...[0.79690784, 1.2895226, 0.22802149, -0.2736021...[-0.12224964, -0.38734418, 0.35824925, 0.95855...[0.6507246, 0.27751687, 0.36114892, -0.0831387...
329DISNEY HER UNIVERSE LILO AND STICH Rainbow Qua...[1.1617887, 0.19193622, 0.046035454, 0.4334900...[-0.20762922, 0.1754938, -0.7334341, -0.106492...[-0.31946087, 0.19534132, 0.37351555, -0.09741...
330PATAGONIA Nano Puff Jacket Zip Primaloft Insul...[0.2912089, 0.72192264, -0.01620815, 0.0022971...[0.0026952028, -1.6660439, 0.03839147, -0.2164...[0.12799336, 0.75828236, 0.10943861, -0.036647...
\n", "

331 rows × 4 columns

\n", "
" ], "text/plain": [ " name \\\n", "0 Women's Under Armour Hustle Fleece Hoodie pull... \n", "1 Patagonia Los Gatos Fleece Grey Pullover.jpg \n", "2 REI Women's Down With It Quilted Hooded Parka ... \n", "3 Chanel Haute Couture Navy Blue Dress Semi Shee... \n", "4 Patagonia Women’s S Los Gatos Quarter-Zip Flee... \n", ".. ... \n", "326 Women's REI Elements Jacket Size M.jpg \n", "327 CHANEL Black cotton bodycon tank dress with zi... \n", "328 Reformation X Veda Women's Bad Leather Jacket ... \n", "329 DISNEY HER UNIVERSE LILO AND STICH Rainbow Qua... \n", "330 PATAGONIA Nano Puff Jacket Zip Primaloft Insul... \n", "\n", " sentence-transformer-clip-ViT-L-14-embedding \\\n", "0 [1.0734258, 0.99022365, 0.32032806, 0.2895219,... \n", "1 [0.6227796, 0.026531212, 0.45240527, -0.488214... \n", "2 [0.8497103, 1.2925782, -0.21685322, 0.24116844... \n", "3 [0.536018, 0.60787296, -0.2751825, 1.0325747, ... \n", "4 [0.79398394, 1.3899276, -0.21383175, 0.0109823... \n", ".. ... \n", "326 [0.6310029, 0.9942212, 0.009293936, 0.7862729,... \n", "327 [1.0761135, 0.18927886, -0.007131472, 0.625682... \n", "328 [0.79690784, 1.2895226, 0.22802149, -0.2736021... \n", "329 [1.1617887, 0.19193622, 0.046035454, 0.4334900... \n", "330 [0.2912089, 0.72192264, -0.01620815, 0.0022971... \n", "\n", " fashion-embedding \\\n", "0 [0.23177437, -1.9268938, 0.273342, -0.02474568... \n", "1 [0.38133767, -1.3040155, 1.1697398, -0.3085520... \n", "2 [-0.30043703, -1.3144073, -0.33848628, 0.24008... \n", "3 [-0.101031125, 0.033914, -0.44531134, -0.64656... \n", "4 [0.60070944, -1.1051046, 1.0572466, 0.47092092... \n", ".. ... \n", "326 [0.19858713, -1.8665266, -0.3323754, 0.0465058... \n", "327 [0.07516122, -0.1886161, 0.1334078, -0.2829321... \n", "328 [-0.12224964, -0.38734418, 0.35824925, 0.95855... \n", "329 [-0.20762922, 0.1754938, -0.7334341, -0.106492... \n", "330 [0.0026952028, -1.6660439, 0.03839147, -0.2164... \n", "\n", " openai-clip-embedding \n", "0 [-0.32902592, -0.09434131, 0.3055967, 0.229937... \n", "1 [-0.1695469, 0.5067289, 0.31120676, -0.0083701... \n", "2 [-0.24841668, 0.4876942, 0.39810008, -0.141552... \n", "3 [-0.08328074, 0.19443086, 0.14361368, 0.259305... \n", "4 [-0.27894062, -0.09589732, 0.5556799, -0.13458... \n", ".. ... \n", "326 [-0.0952643, 0.8016211, 0.08129032, 0.15187423... \n", "327 [-0.12297699, 0.026368856, 0.04415588, 0.26031... \n", "328 [0.6507246, 0.27751687, 0.36114892, -0.0831387... \n", "329 [-0.31946087, 0.19534132, 0.37351555, -0.09741... \n", "330 [0.12799336, 0.75828236, 0.10943861, -0.036647... \n", "\n", "[331 rows x 4 columns]" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings_df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "for fp in os.listdir(data_path + 'images/'):\n", " if '?' in fp:\n", " print(fp)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1+1" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [] }, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "tags": [] }, "outputs": [], "source": [ "df.to_csv('random.tsv', sep='\\t')" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "disco-io/data\n" ] } ], "source": [ "import utils\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "tags": [] }, "outputs": [], "source": [ "from utils import get_immediate_subdirectories" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "disco-io/data\n", "Refreshing all datasets: ['test']\n" ] } ], "source": [ "utils.refresh_all_datasets()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "'test'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "utils.cur_dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "disco-io/data\n" ] }, { "data": { "text/plain": [ "['test']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_immediate_subdirectories('data')\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import utils" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "from utils import fs" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "s3_path = 'data'" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "s3_full_path = f\"{utils.S3_BUCKET}/{s3_path}\"" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['disco-io/data/Cvlsntdjgrnuyrlf.jpg', 'disco-io/data/test']" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fs.glob(f\"{s3_full_path}/*\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fs.isdir('disco-io/data/test')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" }, "vscode": { "interpreter": { "hash": "e85fcd8d0dbb45c39d3e544566c77318961c8114425a16ff4cb5c14067743b34" } } }, "nbformat": 4, "nbformat_minor": 4 }