{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "fe7acfaf-dc61-4211-9c78-8e4433bc9deb",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pickle\n",
"import random\n",
"\n",
"import numpy as np\n",
"import torch\n",
"#import ninja\n",
"from backend.disentangle_concepts import *\n",
"import dnnlib \n",
"import legacy\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"annotations_file = './data/annotated_files/seeds0000-50000.pkl'\n",
"with open(annotations_file, 'rb') as f:\n",
" annotations = pickle.load(f)\n",
"\n",
"ann_df = pd.read_csv('./data/annotated_files/sim_seeds0000-50000.csv')\n",
"concepts = './data/concepts.txt'\n",
"\n",
"with open(concepts) as f:\n",
" labels = [line.strip() for line in f.readlines()]\n",
"\n",
"with dnnlib.util.open_url('./data/model_files/network-snapshot-010600.pkl') as f:\n",
" model = legacy.load_network_pkl(f)['G_ema'].to('cpu') # type: ignore\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecba0e89",
"metadata": {},
"outputs": [],
"source": [
"concept_id = 'Abstract'\n",
"separation_vector, number_important_features, important_nodes, accuracy_sep = get_separation_space(concept_id, annotations, ann_df, method='LR', samples=150, C=0.1, latent_space='W')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7475624",
"metadata": {},
"outputs": [],
"source": [
"separation_vector.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf4b31c0",
"metadata": {},
"outputs": [],
"source": [
"# get_verification_score(concept_id, separation_vector, model, annotations)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac437ea3",
"metadata": {},
"outputs": [],
"source": [
"print(number_important_features, important_nodes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6a63345",
"metadata": {},
"outputs": [],
"source": [
"seed = random.randint(0,10000)\n",
"original_image_vec = annotations['w_vectors'][seed]\n",
"img = generate_original_image(original_image_vec, model, latent_space='W')\n",
"img"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09f13e6a",
"metadata": {},
"outputs": [],
"source": [
"images, lambdas = regenerate_images(model, original_image_vec, separation_vector, min_epsilon=-(int(10)), max_epsilon=int(10), latent_space='W')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cf9f95d",
"metadata": {},
"outputs": [],
"source": [
"images[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48834be4",
"metadata": {},
"outputs": [],
"source": [
"images[-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5390d8f",
"metadata": {},
"outputs": [],
"source": [
"import open_clip\n",
"import os\n",
"import random\n",
"from tqdm import tqdm\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n",
"\n",
"model_clip, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k')\n",
"tokenizer = open_clip.get_tokenizer('ViT-L-14')\n",
"\n",
"pre_prompt = \"Artwork, \" #@param {type:\"string\"}\n",
"text_descriptions = [f\"{pre_prompt}{label}\" for label in labels]\n",
"text_tokens = tokenizer(text_descriptions)\n",
"\n",
"with torch.no_grad(), torch.cuda.amp.autocast():\n",
" text_features = model_clip.encode_text(text_tokens).float()\n",
" text_features /= text_features.norm(dim=-1, keepdim=True)\n",
" \n",
"text_features = text_features.cpu().numpy()\n",
"print(text_features.shape)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0eae840f",
"metadata": {},
"outputs": [],
"source": [
"import open_clip\n",
"import os\n",
"import random\n",
"from tqdm import tqdm\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d776015",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import torch"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3f917a7",
"metadata": {},
"outputs": [],
"source": [
"model_clip, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k')\n",
"tokenizer = open_clip.get_tokenizer('ViT-L-14')\n",
"\n",
"labels = []\n",
"for type_col in ['Provenance', 'Shape Name', 'Fabric', 'Technique']:\n",
" ann_df = pd.read_csv(f'./data/vase_annotated_files/sim_{type_col}_seeds0000-20000.csv').drop(['Unnamed: 0', 'ID'], axis=1)\n",
" \n",
" labels += [type_col + ' ' + lbl for lbl in list(ann_df.columns)]\n",
" \n",
"print(labels)\n",
"pre_prompt = \"Vase art, \" #@param {type:\"string\"}\n",
"text_descriptions = [f\"{pre_prompt}{label}\" for label in labels]\n",
"text_tokens = tokenizer(text_descriptions)\n",
"\n",
"with torch.no_grad(), torch.cuda.amp.autocast():\n",
" text_features = model_clip.encode_text(text_tokens).float()\n",
" text_features /= text_features.norm(dim=-1, keepdim=True)\n",
" \n",
"text_features = text_features.cpu().numpy()\n",
"print(text_features.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7858bbf",
"metadata": {},
"outputs": [],
"source": [
"dic_clip_vecs = {l:v for l,v in zip(labels, text_features)}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "de6bd428",
"metadata": {},
"outputs": [],
"source": [
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d19c8e4c",
"metadata": {},
"outputs": [],
"source": [
"dic_clip_vecs['Provenance ADRIA'].shape\n",
"with open('data/CLIP_vecs_vases.pkl', 'wb') as f:\n",
" pickle.dump(dic_clip_vecs, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89b4a6fc",
"metadata": {},
"outputs": [],
"source": [
"dic_clip_vecs['Abstract'].shape\n",
"with open('data/CLIP_vecs.pkl', 'wb') as f:\n",
" pickle.dump(dic_clip_vecs, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ac84c8c-e916-4719-a7cd-610cea768d7c",
"metadata": {},
"outputs": [],
"source": [
"with open(annotations_file, 'rb') as f:\n",
" annotations = pickle.load(f)\n",
"annotations['fname'][:10], annotations['z_vectors'][:1],# annotations['annotations'][:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5bcfdb5c-0ab9-462f-98ce-78bddbb96710",
"metadata": {},
"outputs": [],
"source": [
"seed = random.randint(0,10000)\n",
"original_image_vec = annotations['z_vectors'][seed]\n",
"img = generate_original_image(original_image_vec, model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8916281",
"metadata": {},
"outputs": [],
"source": [
"concept_id = \"Readymade\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55b47c11-9f42-4ff0-9098-f32b1e826ccb",
"metadata": {},
"outputs": [],
"source": [
"samples = 500 \n",
"abstracts = np.array([float(ann) for ann in ann_df[concept_id]])\n",
"\n",
"repr_idxs = list(np.argsort(abstracts))[:samples]\n",
"abstract_idxs = list(np.argsort(abstracts))[-samples:]\n",
" \n",
"z = annotations['z_vectors'][abstract_idxs[0]]\n",
"img = generate_original_image(z, model)\n",
"if 'ID' in ann_df.columns:\n",
" ann_df = ann_df.drop('ID', axis=1)\n",
"print(ann_df.loc[abstract_idxs[0], :].sort_values()[-20:])\n",
"img"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb7ca184",
"metadata": {},
"outputs": [],
"source": [
"ann_df['Abstract'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1a4d9e3-656c-41c1-a433-1c1a83fa33de",
"metadata": {},
"outputs": [],
"source": [
"z = annotations['z_vectors'][repr_idxs[0]]\n",
"img = generate_original_image(z, model)\n",
"print(ann_df.loc[repr_idxs[0], :].sort_values())\n",
"img"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65218a37-76e6-4df6-929e-fc672ff5f37b",
"metadata": {},
"outputs": [],
"source": [
"for col in ann_df.columns:\n",
" if '1' in col:\n",
" print(col)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72fa8929-40a0-4376-97d2-5d6ac4a4eda6",
"metadata": {},
"outputs": [],
"source": [
"concepts = ['Op Art', 'Minimalism', 'Surrealism', 'Baroque', 'Lithography', 'Woodcut', 'Etching', 'Intaglio',]\n",
"\n",
"vectors, nodes_in_common = get_concepts_vectors(concepts, annotations, ann_df, method='LR', samples=150, C=0.1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43c5fdd8",
"metadata": {},
"outputs": [],
"source": [
"print(nodes_in_common)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0dbf83d5-e33e-4401-8541-1dbd0aa4b10a",
"metadata": {},
"outputs": [],
"source": [
"# import plotly.graph_objects as go\n",
"\n",
"# # Generate example data\n",
"# x = projection[:, 0] # x-coordinates of vectors\n",
"# y = projection[:, 1] # y-coordinates of vectors\n",
"# z = projection[:, 2] # z-coordinates of vectors\n",
"# data = []\n",
"\n",
"# for i in range(len(x)):\n",
"# # Create a 3D scatter plot\n",
"# data.append(go.Scatter3d(\n",
"# x=[0, x[i]], # Start at x = 0\n",
"# y=[0, y[i]], # Start at y = 0\n",
"# z=[0, z[i]], # Start at z = 0\n",
"# mode='lines',\n",
"# marker=dict(size=5),\n",
"# line=dict(width=2),\n",
"# hovertext=concepts[i],\n",
"# text=concepts[i],\n",
"# name=concepts[i],\n",
"# showlegend=True,\n",
"# ))\n",
"\n",
"# # layout = go.Layout(margin = dict( l = 0,\n",
"# # r = 0,\n",
"# # b = 0,\n",
"# # t = 0)\n",
"# # )\n",
"# fig = go.Figure(data=data)\n",
"# # plot(fig,filename=\"vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
"# # # Set the title\n",
"# fig.update_layout(title='3D Vectors')\n",
"# # Set the axis labels\n",
"# fig.update_layout(scene=dict(\n",
"# xaxis=dict(title='X', range=[-10, 10],),\n",
"# yaxis=dict(title='Y', range=[-10, 10],),\n",
"# zaxis=dict(title='Z', range=[-10, 10],),\n",
"# ))\n",
"\n",
"# # Show the plot\n",
"# fig.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30a395c4-9d42-4790-af08-62b474eb1df3",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics.pairwise import cosine_similarity\n",
"\n",
"edges = []\n",
"for i in range(len(concepts)):\n",
" for j in range(len(concepts)):\n",
" if i != j:\n",
" print(f'Similarity between {concepts[i]} and {concepts[j]}')\n",
" similarity = cosine_similarity(vectors[i,:].reshape(1, -1), vectors[j,:].reshape(1, -1))\n",
" print(np.round(similarity[0][0], 3))\n",
" edges.append((concepts[i], concepts[j], np.round(similarity[0][0], 3)))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16e81bd8",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import networkx as nx"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01d31c45",
"metadata": {},
"outputs": [],
"source": [
"# Create an empty graph\n",
"G = nx.Graph()\n",
"\n",
"# Add edges with weights to the graph\n",
"for edge in edges:\n",
" node1, node2, weight = edge\n",
" G.add_edge(node1, node2, weight=weight)\n",
"\n",
"# Draw the graph\n",
"pos = nx.spring_layout(G) # Position nodes using the spring layout algorithm\n",
"nx.draw_networkx(G, pos=pos, with_labels=True, node_color='lightblue', node_size=500, font_size=12)\n",
"\n",
"# Draw edge labels with weights\n",
"edge_labels = nx.get_edge_attributes(G, 'weight')\n",
"nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edge_labels)\n",
"\n",
"# Display the graph interactively\n",
"plt.axis('off')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8fd3469",
"metadata": {},
"outputs": [],
"source": [
"import nxviz as nv\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f0925e9",
"metadata": {},
"outputs": [],
"source": [
"from pyvis.network import Network"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f89bd374",
"metadata": {},
"outputs": [],
"source": [
"net = Network(notebook=True, height=\"750px\", width=\"100%\",)\n",
"for e in edges:\n",
" src = e[0]\n",
" dst = e[1]\n",
" w = e[2]\n",
"\n",
" net.add_node(src, src, title=src)\n",
" net.add_node(dst, dst, title=dst)\n",
" net.add_edge(src, dst, value=w, title=src + ' to ' + dst + ' similarity ' +str(w))\n",
"print(net)\n",
"\n",
"# neighbor_map = net.get_adj_list()\n",
"\n",
"# # add neighbor data to node hover data\n",
"# for node in net.nodes:\n",
"# node[\"title\"] += \" Neighbors:
\" + \"
\".join(neighbor_map[node[\"id\"]])\n",
"# node[\"value\"] = len(neighbor_map[node[\"id\"]])\n",
"\n",
"\n",
"# net.toggle_physics(True)\n",
"net.show('nx.html')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fce19b9e",
"metadata": {},
"outputs": [],
"source": [
"# seed = random.randint(0,10000)\n",
"# original_image_vec = annotations['z_vectors'][seed]\n",
"# img = generate_original_image(original_image_vec, model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "184687a6",
"metadata": {},
"outputs": [],
"source": [
"# z_0 = original_image_vec + 2 * vectors[0] + 2 * vectors[1]\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52bc2a0d",
"metadata": {},
"outputs": [],
"source": [
"# img = model(z_0.to(), None, truncation_psi=0.7, noise_mode='const')\n",
"# img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5afdc77f",
"metadata": {},
"outputs": [],
"source": [
"with dnnlib.util.open_url('./data/model_files/network-snapshot-010600.pkl') as f:\n",
" G = legacy.load_network_pkl(f)['G_ema'].to('cpu') # type: ignore"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad9865af",
"metadata": {},
"outputs": [],
"source": [
"print(G)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c771ba9",
"metadata": {},
"outputs": [],
"source": [
"print(G.mapping)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eec0534d",
"metadata": {},
"outputs": [],
"source": [
"W = G.mapping(z, None, truncation_psi=0.7)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97057ae3",
"metadata": {},
"outputs": [],
"source": [
"print(G.mapping(z, None, truncation_psi=1).shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15cbbc5a",
"metadata": {},
"outputs": [],
"source": [
"print(G.synthesis(W, noise_mode='const').shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f02410a2",
"metadata": {},
"outputs": [],
"source": [
"seed = random.randint(0, 1000000)\n",
"W_0 = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to('cpu')\n",
"W = W_0.repeat([1, G.num_ws, 1])\n",
"img = G.synthesis(W, noise_mode='random')\n",
"img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
"im = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB')\n",
"im.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "046ffb56",
"metadata": {},
"outputs": [],
"source": [
"z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to('cpu')\n",
"W = G.mapping(z, None, truncation_psi=1)\n",
"print(W[0, 0] == W[0, 1])\n",
"img = G.synthesis(W, noise_mode='random')\n",
"img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
"im = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB')\n",
"im.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ea5676d",
"metadata": {},
"outputs": [],
"source": [
"W[:,0,:,].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0687e87",
"metadata": {},
"outputs": [],
"source": [
"z.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a01b9c7f",
"metadata": {},
"outputs": [],
"source": [
"z_vals = []\n",
"w_vals = []\n",
"fnames = []\n",
"# Generate images.\n",
"seeds = range(100)\n",
"for seed_idx, seed in enumerate(seeds):\n",
" print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))\n",
" z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim))\n",
" z_vals.append(z)\n",
" z = z.to('cpu')\n",
" fnames.append(f'data/images/seed{seed:04d}.png')\n",
" # Construct an inverse rotation/translation matrix and pass to the generator. The\n",
" # generator expects this matrix as an inverse to avoid potentially failing numerical\n",
" # operations in the network.\n",
" # if hasattr(G.synthesis, 'input'):\n",
" # m = make_transform(translate, rotate)\n",
" # m = np.linalg.inv(m)\n",
" # G.synthesis.input.transform.copy_(torch.from_numpy(m))\n",
" W = G.mapping(z, None, truncation_psi=1)\n",
" w_vals.append(W[:,0,:])\n",
" img = G.synthesis(W, noise_mode='const')\n",
"\n",
" #img = G(z, None, truncation_psi=0.7, noise_mode='const')\n",
" img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
" PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'data/images/seed{seed:04d}.png')\n",
" \n",
"info = {'fname': fnames, 'z_vectors': z_vals, 'w_vectors': w_vals}\n",
"with open(f'data/images/seeds{seeds[0]:04d}-{seeds[-1]:04d}.pkl', 'wb') as f:\n",
" pickle.dump(info, f)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e44701e",
"metadata": {},
"outputs": [],
"source": [
"# import open_clip\n",
"# from open_clip import tokenizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e85c85c",
"metadata": {},
"outputs": [],
"source": [
"# model, _, preprocess = open_clip.create_model_and_transforms('ViT-g-14', pretrained='laion2b_s34b_b88k')\n",
"# print(model.eval())\n",
"# context_length = model.context_length\n",
"# vocab_size = model.vocab_size\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1990c21",
"metadata": {},
"outputs": [],
"source": [
"import rdflib\n",
"import csv\n",
"\n",
"# Load the RDF file\n",
"rdf_file = '/Users/ludovicaschaerf/Desktop/Vase data/Getty vases.rdf'\n",
"graph = rdflib.Graph()\n",
"graph.parse(rdf_file)\n",
"\n",
"# Specify the desired CSV file path\n",
"csv_file = '/Users/ludovicaschaerf/Desktop/Vase data/Getty vases.csv'\n",
"print(len(graph))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "92d94cf3",
"metadata": {},
"outputs": [],
"source": [
"print(len(graph))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5c47800b",
"metadata": {},
"outputs": [],
"source": [
"# Open the CSV file in write mode\n",
"with open(csv_file, 'w', newline='', encoding='utf-8') as file:\n",
"\n",
" # Create a CSV writer object\n",
" writer = csv.writer(file)\n",
"\n",
" # Write the header row\n",
" writer.writerow(['Subject', 'Predicate', 'Object'])\n",
"\n",
" # Iterate over each triple in the RDF graph\n",
" for subject, predicate, obj in graph:\n",
"\n",
" # Convert the triple components to strings\n",
" subject_str = str(subject)\n",
" predicate_str = str(predicate)\n",
" object_str = str(obj)\n",
"\n",
" # Write the triple as a row in the CSV file\n",
" writer.writerow([subject_str, predicate_str, object_str])\n",
"\n",
"print('Conversion completed successfully!')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e7d4b21",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"final_file = pd.read_csv(csv_file)\n",
"final_file"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c337915d",
"metadata": {},
"outputs": [],
"source": [
"vases = pd.read_csv('/Users/ludovicaschaerf/Desktop/Vase data/oxford_vases_info.csv')\n",
"vases_links = vases['URI']\n",
"vases.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bdd909e5",
"metadata": {},
"outputs": [],
"source": [
"vases['Vase Number'].nunique(), vases.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "883531df",
"metadata": {},
"outputs": [],
"source": [
"vases = vases.groupby('Vase Number').first().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee27fdbe",
"metadata": {},
"outputs": [],
"source": [
"from bs4 import BeautifulSoup\n",
"import requests\n",
"response = requests.get(vases_links[0])\n",
"html_content = response.text\n",
"soup = BeautifulSoup(html_content, 'html.parser')\n",
"div = soup.find('div', class_='recordImage')\n",
"if div is not None:\n",
" img = div.find('img')\n",
" if img is not None:\n",
" img_src = img['src']\n",
" print(\"Image source:\", img_src)\n",
" else:\n",
" print(\"No image found inside the div.\")\n",
"else:\n",
" print(\"Div not found.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2edc40d7",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"# Specify the folder path to save the images\n",
"folder_path = '/Users/ludovicaschaerf/Desktop/Vase data/images'\n",
"\n",
"# Create the folder if it doesn't exist\n",
"os.makedirs(folder_path, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7bfcfb3",
"metadata": {},
"outputs": [],
"source": [
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f06e4e3",
"metadata": {},
"outputs": [],
"source": [
"for uri,i in tqdm(zip(vases['URI'], vases['Vase Number'])):\n",
" if i > 473:\n",
" response = requests.get(uri)\n",
" html_content = response.text\n",
" soup = BeautifulSoup(html_content, 'html.parser')\n",
" div = soup.find('div', class_='recordImage')\n",
" if div is not None:\n",
" img = div.find('img')\n",
" if img is not None:\n",
" img_src = 'https://www.carc.ox.ac.uk/' + img['src']\n",
" print(\"Image source:\", img_src)\n",
" else:\n",
" print(\"No image found inside the div.\", i)\n",
" continue\n",
" else:\n",
" print(\"Div not found.\", i)\n",
" continue\n",
" # Specify the path to save the image\n",
" save_path = os.path.join(folder_path, str(i) + '.jpg')\n",
"\n",
" # Download the image and save it to the specified path\n",
" response = requests.get(img_src)\n",
" if response.status_code == 200:\n",
" with open(save_path, 'wb') as file:\n",
" file.write(response.content)\n",
" print(\"Image downloaded successfully.\")\n",
" else:\n",
" print(\"Failed to download the image.\")\n",
" continue"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1420a58",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}