latent-space-theories

Runtime error

App Files Files Community

ludusc commited on Jun 7, 2023

Commit

8a2c29c

1 Parent(s): 3f788ef

small fixes, CLIP vecs graph

Browse files

Files changed (4) hide show

data/CLIP_vecs.pkl +3 -0
pages/1_Disentanglement.py +1 -1
pages/2_Concepts_comparison.py +64 -11
view_predictions.ipynb +67 -0

data/CLIP_vecs.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2971a01a74a391c752fff9ba91c2939ffc6b29165842a87b911e67d9658df53
+size 412234

pages/1_Disentanglement.py CHANGED Viewed

@@ -128,7 +128,7 @@ with input_col_2:
         random_id = st.form_submit_button('Generate a random image')
         if random_id:
-            image_id = random.randint(0, 100000)
             st.session_state.image_id = image_id
             chosen_image_id_input.number_input('Image ID:', format='%d', step=1, value=st.session_state.image_id)

         random_id = st.form_submit_button('Generate a random image')
         if random_id:
+            image_id = random.randint(0, 50000)
             st.session_state.image_id = image_id
             chosen_image_id_input.number_input('Image ID:', format='%d', step=1, value=st.session_state.image_id)

pages/2_Concepts_comparison.py CHANGED Viewed

@@ -25,11 +25,11 @@ st.write('> **What is their join impact on the image?**')
 st.write("""Description to write""")
-annotations_file = './data/annotated_files/seeds0000-100000.pkl'
 with open(annotations_file, 'rb') as f:
     annotations = pickle.load(f)
-ann_df = pd.read_csv('./data/annotated_files/sim_seeds0000-100000.csv')
 concepts = './data/concepts.txt'
 with open(concepts) as f:
@@ -57,13 +57,6 @@ with input_col_1:
         # concept_id = chosen_text_id_input.text_input('Concept:', value=st.session_state.concept_id)
         concept_ids = st.multiselect('Concept:', tuple(labels))
-        choose_text_button = st.form_submit_button('Choose the defined concepts')
-        # random_text = st.form_submit_button('Select a random concept')
-        # if random_text:
-        #     concept_id = random.choice(labels)
-        #     st.session_state.concept_id = concept_id
-        #     chosen_text_id_input.text_input('Concept:', value=st.session_state.concept_id)
         st.write('**Choose a latent space to disentangle**')
         # chosen_text_id_input = st.empty()
         # concept_id = chosen_text_id_input.text_input('Concept:', value=st.session_state.concept_id)
@@ -85,8 +78,8 @@ st.subheader('Concept vector')
 # perform attack container
 # header_col_1, header_col_2, header_col_3, header_col_4, header_col_5 = st.columns([1,1,1,1,1])
 # output_col_1, output_col_2, output_col_3, output_col_4, output_col_5 = st.columns([1,1,1,1,1])
-header_col_1, header_col_2 = st.columns([5,1])
-output_col_1, output_col_2 = st.columns([5,1])
 st.subheader('Derivations along the concept vector')
@@ -157,6 +150,66 @@ with output_col_1:
     # Load HTML file in HTML component for display on Streamlit page
     components.html(HtmlFile.read(), height=435)
 # ----------------------------- INPUT column 2 & 3 ----------------------------
 # with input_col_2:
 #    with st.form('image_form'):

 st.write("""Description to write""")
+annotations_file = './data/annotated_files/seeds0000-50000.pkl'
 with open(annotations_file, 'rb') as f:
     annotations = pickle.load(f)
+ann_df = pd.read_csv('./data/annotated_files/sim_seeds0000-50000.csv')
 concepts = './data/concepts.txt'
 with open(concepts) as f:
         # concept_id = chosen_text_id_input.text_input('Concept:', value=st.session_state.concept_id)
         concept_ids = st.multiselect('Concept:', tuple(labels))
         st.write('**Choose a latent space to disentangle**')
         # chosen_text_id_input = st.empty()
         # concept_id = chosen_text_id_input.text_input('Concept:', value=st.session_state.concept_id)
 # perform attack container
 # header_col_1, header_col_2, header_col_3, header_col_4, header_col_5 = st.columns([1,1,1,1,1])
 # output_col_1, output_col_2, output_col_3, output_col_4, output_col_5 = st.columns([1,1,1,1,1])
+header_col_1, header_col_2 = st.columns([1,1])
+output_col_1, output_col_2 = st.columns([1,1])
 st.subheader('Derivations along the concept vector')
     # Load HTML file in HTML component for display on Streamlit page
     components.html(HtmlFile.read(), height=435)
+with output_col_2:
+    with open('data/CLIP_vecs.pkl', 'rb') as f:
+        vectors = pickle.load(f)
+    # st.write(f'Class ID {input_id} - {input_label}: {pred_prob*100:.3f}% confidence')
+    #st.write('Concept vector', separation_vector)
+    header_col_2.write(f'Concepts {", ".join(concept_ids)} - Latent space CLIP')# - Nodes {",".join(list(imp_nodes))}')
+    edges = []
+    for i in range(len(concept_ids)):
+        for j in range(len(concept_ids)):
+            if i != j:
+                print(f'Similarity between {concept_ids[i]} and {concept_ids[j]}')
+                similarity = cosine_similarity(vectors[i,:].reshape(1, -1), vectors[j,:].reshape(1, -1))
+                print(np.round(similarity[0][0], 3))
+                edges.append((concept_ids[i], concept_ids[j], np.round(similarity[0][0], 3)))
+    # # Create an empty graph
+    # G = nx.Graph()
+    # # Add edges with weights to the graph
+    # for edge in edges:
+    #     node1, node2, weight = edge
+    #     G.add_edge(node1, node2, weight=weight)
+    net = Network(height="750px", width="100%",)
+    for e in edges:
+        src = e[0]
+        dst = e[1]
+        w = e[2]
+        net.add_node(src, src, title=src)
+        net.add_node(dst, dst, title=dst)
+        net.add_edge(src, dst, value=w, title=src + ' to ' + dst + ' similarity ' +str(w))
+    print(net)
+    # Generate network with specific layout settings
+    net.repulsion(
+                        node_distance=420,
+                        central_gravity=0.33,
+                        spring_length=110,
+                        spring_strength=0.10,
+                        damping=0.95
+                       )
+    # Save and read graph as HTML file (on Streamlit Sharing)
+    try:
+        path = '/tmp'
+        net.save_graph(f'{path}/pyvis_graph_clip.html')
+        HtmlFile = open(f'{path}/pyvis_graph_clip.html', 'r', encoding='utf-8')
+    # Save and read graph as HTML file (locally)
+    except:
+        path = '/html_files'
+        net.save_graph(f'{path}/pyvis_graph_clip.html')
+        HtmlFile = open(f'{path}/pyvis_graph_clip.html', 'r', encoding='utf-8')
+    # Load HTML file in HTML component for display on Streamlit page
+    components.html(HtmlFile.read(), height=435)
 # ----------------------------- INPUT column 2 & 3 ----------------------------
 # with input_col_2:
 #    with st.form('image_form'):

view_predictions.ipynb CHANGED Viewed

@@ -208,6 +208,73 @@
     "images[-1]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 8,

     "images[-1]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "f5390d8f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ludovicaschaerf/anaconda3/envs/art-reco_x86/lib/python3.8/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
+      "  warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(132, 768)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import open_clip\n",
+    "import os\n",
+    "import random\n",
+    "from tqdm import tqdm\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n",
+    "\n",
+    "model_clip, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k')\n",
+    "tokenizer = open_clip.get_tokenizer('ViT-L-14')\n",
+    "\n",
+    "pre_prompt = \"Artwork, \" #@param {type:\"string\"}\n",
+    "text_descriptions = [f\"{pre_prompt}{label}\" for label in labels]\n",
+    "text_tokens = tokenizer(text_descriptions)\n",
+    "\n",
+    "with torch.no_grad(), torch.cuda.amp.autocast():\n",
+    "    text_features = model_clip.encode_text(text_tokens).float()\n",
+    "    text_features /= text_features.norm(dim=-1, keepdim=True)\n",
+    "    \n",
+    "text_features = text_features.cpu().numpy()\n",
+    "print(text_features.shape)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "f7858bbf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dic_clip_vecs = {l:v for l,v in zip(labels, text_features)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "89b4a6fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dic_clip_vecs['Abstract'].shape\n",
+    "with open('data/CLIP_vecs.pkl', 'wb') as f:\n",
+    "    pickle.dump(dic_clip_vecs, f)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 8,