{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe7acfaf-dc61-4211-9c78-8e4433bc9deb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import pickle\n",
    "import random\n",
    "\n",
    "import numpy as np\n",
    "import torch\n",
    "#import ninja\n",
    "from backend.disentangle_concepts import *\n",
    "import dnnlib \n",
    "import legacy\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "annotations_file = './data/annotated_files/seeds0000-50000.pkl'\n",
    "with open(annotations_file, 'rb') as f:\n",
    "    annotations = pickle.load(f)\n",
    "\n",
    "ann_df = pd.read_csv('./data/annotated_files/sim_seeds0000-50000.csv')\n",
    "concepts = './data/concepts.txt'\n",
    "\n",
    "with open(concepts) as f:\n",
    "    labels = [line.strip() for line in f.readlines()]\n",
    "\n",
    "with dnnlib.util.open_url('./data/model_files/network-snapshot-010600.pkl') as f:\n",
    "    model = legacy.load_network_pkl(f)['G_ema'].to('cpu') # type: ignore\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ecba0e89",
   "metadata": {},
   "outputs": [],
   "source": [
    "concept_id = 'Abstract'\n",
    "separation_vector, number_important_features, important_nodes, accuracy_sep = get_separation_space(concept_id, annotations, ann_df, method='LR', samples=150, C=0.1, latent_space='W')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7475624",
   "metadata": {},
   "outputs": [],
   "source": [
    "separation_vector.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf4b31c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get_verification_score(concept_id, separation_vector, model, annotations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac437ea3",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(number_important_features, important_nodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6a63345",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = random.randint(0,10000)\n",
    "original_image_vec = annotations['w_vectors'][seed]\n",
    "img = generate_original_image(original_image_vec, model, latent_space='W')\n",
    "img"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09f13e6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "images, lambdas = regenerate_images(model, original_image_vec, separation_vector, min_epsilon=-(int(10)), max_epsilon=int(10), latent_space='W')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1cf9f95d",
   "metadata": {},
   "outputs": [],
   "source": [
    "images[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "48834be4",
   "metadata": {},
   "outputs": [],
   "source": [
    "images[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5390d8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import open_clip\n",
    "import os\n",
    "import random\n",
    "from tqdm import tqdm\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n",
    "\n",
    "model_clip, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k')\n",
    "tokenizer = open_clip.get_tokenizer('ViT-L-14')\n",
    "\n",
    "pre_prompt = \"Artwork, \" #@param {type:\"string\"}\n",
    "text_descriptions = [f\"{pre_prompt}{label}\" for label in labels]\n",
    "text_tokens = tokenizer(text_descriptions)\n",
    "\n",
    "with torch.no_grad(), torch.cuda.amp.autocast():\n",
    "    text_features = model_clip.encode_text(text_tokens).float()\n",
    "    text_features /= text_features.norm(dim=-1, keepdim=True)\n",
    "    \n",
    "text_features = text_features.cpu().numpy()\n",
    "print(text_features.shape)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0eae840f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import open_clip\n",
    "import os\n",
    "import random\n",
    "from tqdm import tqdm\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d776015",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3f917a7",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_clip, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k')\n",
    "tokenizer = open_clip.get_tokenizer('ViT-L-14')\n",
    "\n",
    "labels = []\n",
    "for type_col in ['Provenance', 'Shape Name', 'Fabric', 'Technique']:\n",
    "    ann_df = pd.read_csv(f'./data/vase_annotated_files/sim_{type_col}_seeds0000-20000.csv').drop(['Unnamed: 0', 'ID'], axis=1)\n",
    "    \n",
    "    labels += [type_col + ' ' + lbl for lbl in list(ann_df.columns)]\n",
    "        \n",
    "print(labels)\n",
    "pre_prompt = \"Vase art, \" #@param {type:\"string\"}\n",
    "text_descriptions = [f\"{pre_prompt}{label}\" for label in labels]\n",
    "text_tokens = tokenizer(text_descriptions)\n",
    "\n",
    "with torch.no_grad(), torch.cuda.amp.autocast():\n",
    "    text_features = model_clip.encode_text(text_tokens).float()\n",
    "    text_features /= text_features.norm(dim=-1, keepdim=True)\n",
    "    \n",
    "text_features = text_features.cpu().numpy()\n",
    "print(text_features.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7858bbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "dic_clip_vecs = {l:v for l,v in zip(labels, text_features)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de6bd428",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d19c8e4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "dic_clip_vecs['Provenance ADRIA'].shape\n",
    "with open('data/CLIP_vecs_vases.pkl', 'wb') as f:\n",
    "    pickle.dump(dic_clip_vecs, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89b4a6fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "dic_clip_vecs['Abstract'].shape\n",
    "with open('data/CLIP_vecs.pkl', 'wb') as f:\n",
    "    pickle.dump(dic_clip_vecs, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ac84c8c-e916-4719-a7cd-610cea768d7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(annotations_file, 'rb') as f:\n",
    "        annotations = pickle.load(f)\n",
    "annotations['fname'][:10], annotations['z_vectors'][:1],# annotations['annotations'][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5bcfdb5c-0ab9-462f-98ce-78bddbb96710",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = random.randint(0,10000)\n",
    "original_image_vec = annotations['z_vectors'][seed]\n",
    "img = generate_original_image(original_image_vec, model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8916281",
   "metadata": {},
   "outputs": [],
   "source": [
    "concept_id = \"Readymade\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55b47c11-9f42-4ff0-9098-f32b1e826ccb",
   "metadata": {},
   "outputs": [],
   "source": [
    "samples = 500 \n",
    "abstracts = np.array([float(ann) for ann in ann_df[concept_id]])\n",
    "\n",
    "repr_idxs = list(np.argsort(abstracts))[:samples]\n",
    "abstract_idxs = list(np.argsort(abstracts))[-samples:]\n",
    "    \n",
    "z = annotations['z_vectors'][abstract_idxs[0]]\n",
    "img = generate_original_image(z, model)\n",
    "if 'ID' in ann_df.columns:\n",
    "    ann_df = ann_df.drop('ID', axis=1)\n",
    "print(ann_df.loc[abstract_idxs[0], :].sort_values()[-20:])\n",
    "img"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb7ca184",
   "metadata": {},
   "outputs": [],
   "source": [
    "ann_df['Abstract'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1a4d9e3-656c-41c1-a433-1c1a83fa33de",
   "metadata": {},
   "outputs": [],
   "source": [
    "z = annotations['z_vectors'][repr_idxs[0]]\n",
    "img = generate_original_image(z, model)\n",
    "print(ann_df.loc[repr_idxs[0], :].sort_values())\n",
    "img"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65218a37-76e6-4df6-929e-fc672ff5f37b",
   "metadata": {},
   "outputs": [],
   "source": [
    "for col in ann_df.columns:\n",
    "    if '1' in col:\n",
    "        print(col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72fa8929-40a0-4376-97d2-5d6ac4a4eda6",
   "metadata": {},
   "outputs": [],
   "source": [
    "concepts = ['Op Art', 'Minimalism', 'Surrealism', 'Baroque', 'Lithography', 'Woodcut', 'Etching', 'Intaglio',]\n",
    "\n",
    "vectors, nodes_in_common = get_concepts_vectors(concepts, annotations, ann_df, method='LR', samples=150, C=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "43c5fdd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(nodes_in_common)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0dbf83d5-e33e-4401-8541-1dbd0aa4b10a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import plotly.graph_objects as go\n",
    "\n",
    "# # Generate example data\n",
    "# x = projection[:, 0]  # x-coordinates of vectors\n",
    "# y = projection[:, 1]  # y-coordinates of vectors\n",
    "# z = projection[:, 2]  # z-coordinates of vectors\n",
    "# data = []\n",
    "\n",
    "# for i in range(len(x)):\n",
    "#     # Create a 3D scatter plot\n",
    "#     data.append(go.Scatter3d(\n",
    "#         x=[0, x[i]],  # Start at x = 0\n",
    "#         y=[0, y[i]],  # Start at y = 0\n",
    "#         z=[0, z[i]],  # Start at z = 0\n",
    "#         mode='lines',\n",
    "#         marker=dict(size=5),\n",
    "#         line=dict(width=2),\n",
    "#         hovertext=concepts[i],\n",
    "#         text=concepts[i],\n",
    "#         name=concepts[i],\n",
    "#         showlegend=True,\n",
    "#     ))\n",
    "\n",
    "# # layout = go.Layout(margin = dict( l = 0,\n",
    "# #                                   r = 0,\n",
    "# #                                   b = 0,\n",
    "# #                                   t = 0)\n",
    "# #                   )\n",
    "# fig = go.Figure(data=data)\n",
    "# # plot(fig,filename=\"vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
    "# # # Set the title\n",
    "# fig.update_layout(title='3D Vectors')\n",
    "# # Set the axis labels\n",
    "# fig.update_layout(scene=dict(\n",
    "#     xaxis=dict(title='X', range=[-10, 10],),\n",
    "#     yaxis=dict(title='Y', range=[-10, 10],),\n",
    "#     zaxis=dict(title='Z', range=[-10, 10],),\n",
    "# ))\n",
    "\n",
    "# # Show the plot\n",
    "# fig.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30a395c4-9d42-4790-af08-62b474eb1df3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "\n",
    "edges = []\n",
    "for i in range(len(concepts)):\n",
    "    for j in range(len(concepts)):\n",
    "        if i != j:\n",
    "            print(f'Similarity between {concepts[i]} and {concepts[j]}')\n",
    "            similarity = cosine_similarity(vectors[i,:].reshape(1, -1), vectors[j,:].reshape(1, -1))\n",
    "            print(np.round(similarity[0][0], 3))\n",
    "            edges.append((concepts[i], concepts[j], np.round(similarity[0][0], 3)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16e81bd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import networkx as nx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01d31c45",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create an empty graph\n",
    "G = nx.Graph()\n",
    "\n",
    "# Add edges with weights to the graph\n",
    "for edge in edges:\n",
    "    node1, node2, weight = edge\n",
    "    G.add_edge(node1, node2, weight=weight)\n",
    "\n",
    "# Draw the graph\n",
    "pos = nx.spring_layout(G)  # Position nodes using the spring layout algorithm\n",
    "nx.draw_networkx(G, pos=pos, with_labels=True, node_color='lightblue', node_size=500, font_size=12)\n",
    "\n",
    "# Draw edge labels with weights\n",
    "edge_labels = nx.get_edge_attributes(G, 'weight')\n",
    "nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edge_labels)\n",
    "\n",
    "# Display the graph interactively\n",
    "plt.axis('off')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8fd3469",
   "metadata": {},
   "outputs": [],
   "source": [
    "import nxviz as nv\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f0925e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyvis.network import Network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f89bd374",
   "metadata": {},
   "outputs": [],
   "source": [
    "net = Network(notebook=True, height=\"750px\", width=\"100%\",)\n",
    "for e in edges:\n",
    "    src = e[0]\n",
    "    dst = e[1]\n",
    "    w = e[2]\n",
    "\n",
    "    net.add_node(src, src, title=src)\n",
    "    net.add_node(dst, dst, title=dst)\n",
    "    net.add_edge(src, dst, value=w, title=src + ' to ' + dst + ' similarity ' +str(w))\n",
    "print(net)\n",
    "\n",
    "# neighbor_map = net.get_adj_list()\n",
    "\n",
    "# # add neighbor data to node hover data\n",
    "# for node in net.nodes:\n",
    "#     node[\"title\"] += \" Neighbors:<br>\" + \"<br>\".join(neighbor_map[node[\"id\"]])\n",
    "#     node[\"value\"] = len(neighbor_map[node[\"id\"]])\n",
    "\n",
    "\n",
    "# net.toggle_physics(True)\n",
    "net.show('nx.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fce19b9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# seed = random.randint(0,10000)\n",
    "# original_image_vec = annotations['z_vectors'][seed]\n",
    "# img = generate_original_image(original_image_vec, model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "184687a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# z_0 = original_image_vec + 2 * vectors[0] + 2 * vectors[1]\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52bc2a0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# img = model(z_0.to(), None, truncation_psi=0.7, noise_mode='const')\n",
    "# img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5afdc77f",
   "metadata": {},
   "outputs": [],
   "source": [
    "with dnnlib.util.open_url('./data/model_files/network-snapshot-010600.pkl') as f:\n",
    "    G = legacy.load_network_pkl(f)['G_ema'].to('cpu') # type: ignore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad9865af",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(G)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c771ba9",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(G.mapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eec0534d",
   "metadata": {},
   "outputs": [],
   "source": [
    "W = G.mapping(z, None, truncation_psi=0.7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97057ae3",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(G.mapping(z, None, truncation_psi=1).shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15cbbc5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(G.synthesis(W, noise_mode='const').shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f02410a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = random.randint(0, 1000000)\n",
    "W_0 = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to('cpu')\n",
    "W = W_0.repeat([1, G.num_ws, 1])\n",
    "img = G.synthesis(W, noise_mode='random')\n",
    "img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
    "im = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB')\n",
    "im.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "046ffb56",
   "metadata": {},
   "outputs": [],
   "source": [
    "z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to('cpu')\n",
    "W = G.mapping(z, None, truncation_psi=1)\n",
    "print(W[0, 0] == W[0, 1])\n",
    "img = G.synthesis(W, noise_mode='random')\n",
    "img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
    "im = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB')\n",
    "im.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ea5676d",
   "metadata": {},
   "outputs": [],
   "source": [
    "W[:,0,:,].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0687e87",
   "metadata": {},
   "outputs": [],
   "source": [
    "z.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a01b9c7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "z_vals = []\n",
    "w_vals = []\n",
    "fnames = []\n",
    "# Generate images.\n",
    "seeds = range(100)\n",
    "for seed_idx, seed in enumerate(seeds):\n",
    "    print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))\n",
    "    z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim))\n",
    "    z_vals.append(z)\n",
    "    z = z.to('cpu')\n",
    "    fnames.append(f'data/images/seed{seed:04d}.png')\n",
    "    # Construct an inverse rotation/translation matrix and pass to the generator.  The\n",
    "    # generator expects this matrix as an inverse to avoid potentially failing numerical\n",
    "    # operations in the network.\n",
    "    # if hasattr(G.synthesis, 'input'):\n",
    "    #     m = make_transform(translate, rotate)\n",
    "    #     m = np.linalg.inv(m)\n",
    "    #     G.synthesis.input.transform.copy_(torch.from_numpy(m))\n",
    "    W = G.mapping(z, None, truncation_psi=1)\n",
    "    w_vals.append(W[:,0,:])\n",
    "    img = G.synthesis(W, noise_mode='const')\n",
    "\n",
    "    #img = G(z, None, truncation_psi=0.7, noise_mode='const')\n",
    "    img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)\n",
    "    PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'data/images/seed{seed:04d}.png')\n",
    "    \n",
    "info = {'fname': fnames, 'z_vectors': z_vals, 'w_vectors': w_vals}\n",
    "with open(f'data/images/seeds{seeds[0]:04d}-{seeds[-1]:04d}.pkl', 'wb') as f:\n",
    "    pickle.dump(info, f)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e44701e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# import open_clip\n",
    "# from open_clip import tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e85c85c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# model, _, preprocess = open_clip.create_model_and_transforms('ViT-g-14', pretrained='laion2b_s34b_b88k')\n",
    "# print(model.eval())\n",
    "# context_length = model.context_length\n",
    "# vocab_size = model.vocab_size\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1990c21",
   "metadata": {},
   "outputs": [],
   "source": [
    "import rdflib\n",
    "import csv\n",
    "\n",
    "# Load the RDF file\n",
    "rdf_file = '/Users/ludovicaschaerf/Desktop/Vase data/Getty vases.rdf'\n",
    "graph = rdflib.Graph()\n",
    "graph.parse(rdf_file)\n",
    "\n",
    "# Specify the desired CSV file path\n",
    "csv_file = '/Users/ludovicaschaerf/Desktop/Vase data/Getty vases.csv'\n",
    "print(len(graph))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92d94cf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(len(graph))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c47800b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Open the CSV file in write mode\n",
    "with open(csv_file, 'w', newline='', encoding='utf-8') as file:\n",
    "\n",
    "    # Create a CSV writer object\n",
    "    writer = csv.writer(file)\n",
    "\n",
    "    # Write the header row\n",
    "    writer.writerow(['Subject', 'Predicate', 'Object'])\n",
    "\n",
    "    # Iterate over each triple in the RDF graph\n",
    "    for subject, predicate, obj in graph:\n",
    "\n",
    "        # Convert the triple components to strings\n",
    "        subject_str = str(subject)\n",
    "        predicate_str = str(predicate)\n",
    "        object_str = str(obj)\n",
    "\n",
    "        # Write the triple as a row in the CSV file\n",
    "        writer.writerow([subject_str, predicate_str, object_str])\n",
    "\n",
    "print('Conversion completed successfully!')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e7d4b21",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "final_file = pd.read_csv(csv_file)\n",
    "final_file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c337915d",
   "metadata": {},
   "outputs": [],
   "source": [
    "vases = pd.read_csv('/Users/ludovicaschaerf/Desktop/Vase data/oxford_vases_info.csv')\n",
    "vases_links = vases['URI']\n",
    "vases.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdd909e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "vases['Vase Number'].nunique(), vases.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "883531df",
   "metadata": {},
   "outputs": [],
   "source": [
    "vases = vases.groupby('Vase Number').first().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee27fdbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "import requests\n",
    "response = requests.get(vases_links[0])\n",
    "html_content = response.text\n",
    "soup = BeautifulSoup(html_content, 'html.parser')\n",
    "div = soup.find('div', class_='recordImage')\n",
    "if div is not None:\n",
    "    img = div.find('img')\n",
    "    if img is not None:\n",
    "        img_src = img['src']\n",
    "        print(\"Image source:\", img_src)\n",
    "    else:\n",
    "        print(\"No image found inside the div.\")\n",
    "else:\n",
    "    print(\"Div not found.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2edc40d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "# Specify the folder path to save the images\n",
    "folder_path = '/Users/ludovicaschaerf/Desktop/Vase data/images'\n",
    "\n",
    "# Create the folder if it doesn't exist\n",
    "os.makedirs(folder_path, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7bfcfb3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f06e4e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "for uri,i in tqdm(zip(vases['URI'], vases['Vase Number'])):\n",
    "    if i > 473:\n",
    "        response = requests.get(uri)\n",
    "        html_content = response.text\n",
    "        soup = BeautifulSoup(html_content, 'html.parser')\n",
    "        div = soup.find('div', class_='recordImage')\n",
    "        if div is not None:\n",
    "            img = div.find('img')\n",
    "            if img is not None:\n",
    "                img_src = 'https://www.carc.ox.ac.uk/' + img['src']\n",
    "                print(\"Image source:\", img_src)\n",
    "            else:\n",
    "                print(\"No image found inside the div.\", i)\n",
    "                continue\n",
    "        else:\n",
    "            print(\"Div not found.\", i)\n",
    "            continue\n",
    "        # Specify the path to save the image\n",
    "        save_path = os.path.join(folder_path, str(i) + '.jpg')\n",
    "\n",
    "        # Download the image and save it to the specified path\n",
    "        response = requests.get(img_src)\n",
    "        if response.status_code == 200:\n",
    "            with open(save_path, 'wb') as file:\n",
    "                file.write(response.content)\n",
    "            print(\"Image downloaded successfully.\")\n",
    "        else:\n",
    "            print(\"Failed to download the image.\")\n",
    "            continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1420a58",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}