latent-space-theories

Runtime error

App Files Files Community

ludusc commited on Jun 5, 2023

Commit

70dfa79

•

1 Parent(s): 83d8189

view performance, cleaning up

Browse files

Files changed (6) hide show

.gitignore +3 -1
backend/disentangle_concepts.py +61 -4
pages/1_Disentanglement.py +2 -2
pages/2_Concepts_comparison.py +2 -2
tmp/nx.html +0 -155
view_predictions.ipynb +0 -0

.gitignore CHANGED Viewed

@@ -181,4 +181,6 @@ dmypy.json
 .pytype/
 # Cython debug symbols
-cython_debug/

 .pytype/
 # Cython debug symbols
+cython_debug/
+data/images/

backend/disentangle_concepts.py CHANGED Viewed

@@ -7,6 +7,21 @@ from umap import UMAP
 import PIL
 def get_separation_space(type_bin, annotations, df, samples=100, method='LR', C=0.1):
     abstracts = np.array([float(ann) for ann in df[type_bin]])
     abstract_idxs = list(np.argsort(abstracts))[:samples]
     repr_idxs = list(np.argsort(abstracts))[-samples:]
@@ -20,17 +35,32 @@ def get_separation_space(type_bin, annotations, df, samples=100, method='LR', C=
         print('Val performance SVM', svc.score(x_val, y_val))
         imp_features = (np.abs(svc.coef_) > 0.2).sum()
         imp_nodes = np.where(np.abs(svc.coef_) > 0.2)[1]
-        return svc.coef_, imp_features, imp_nodes
     elif method == 'LR':
         clf = LogisticRegression(random_state=0, C=C)
         clf.fit(x_train, y_train)
         print('Val performance logistic regression', clf.score(x_val, y_val))
         imp_features = (np.abs(clf.coef_) > 0.15).sum()
         imp_nodes = np.where(np.abs(clf.coef_) > 0.15)[1]
-        return clf.coef_ / np.linalg.norm(clf.coef_), imp_features, imp_nodes
 def regenerate_images(model, z, decision_boundary, min_epsilon=-3, max_epsilon=3, count=5):
     device = torch.device('cpu')
     G = model.to(device) # type: ignore
@@ -62,6 +92,16 @@ def regenerate_images(model, z, decision_boundary, min_epsilon=-3, max_epsilon=3
     return images, lambdas
 def generate_original_image(z, model):
     device = torch.device('cpu')
     G = model.to(device) # type: ignore
     # Labels.
@@ -73,11 +113,28 @@ def generate_original_image(z, model):
 def get_concepts_vectors(concepts, annotations, df, samples=100, method='LR', C=0.1):
     important_nodes = []
     vectors = np.zeros((len(concepts), 512))
     for i, conc in enumerate(concepts):
-        vec, _, imp_nodes = get_separation_space(conc, annotations, df, samples=samples, method=method, C=C)
         vectors[i,:] = vec
         important_nodes.append(set(imp_nodes))
     # reducer = UMAP(n_neighbors=3, # default 15, The size of local neighborhood (in terms of number of neighboring sample points) used for manifold approximation.
@@ -89,5 +146,5 @@ def get_concepts_vectors(concepts, annotations, df, samples=100, method='LR', C=
     # projection = reducer.fit_transform(vectors)
     nodes_in_common = set.intersection(*important_nodes)
-    return vectors, nodes_in_common

 import PIL
 def get_separation_space(type_bin, annotations, df, samples=100, method='LR', C=0.1):
+    """
+    The get_separation_space function takes in a type_bin, annotations, and df.
+    It then samples 100 of the most representative abstracts for that type_bin and 100 of the least representative abstracts for that type_bin.
+    It then trains an SVM or logistic regression model on these 200 samples to find a separation space between them.
+    The function returns this separation space as well as how many nodes are important in this separation space.
+    :param type_bin: Select the type of abstracts to be used for training
+    :param annotations: Access the z_vectors
+    :param df: Get the abstracts that are used for training
+    :param samples: Determine how many samples to take from the top and bottom of the distribution
+    :param method: Specify the classifier to use
+    :param C: Control the regularization strength
+    :return: The weights of the linear classifier
+    :doc-author: Trelent
+    """
     abstracts = np.array([float(ann) for ann in df[type_bin]])
     abstract_idxs = list(np.argsort(abstracts))[:samples]
     repr_idxs = list(np.argsort(abstracts))[-samples:]
         print('Val performance SVM', svc.score(x_val, y_val))
         imp_features = (np.abs(svc.coef_) > 0.2).sum()
         imp_nodes = np.where(np.abs(svc.coef_) > 0.2)[1]
+        return svc.coef_, imp_features, imp_nodes, np.round(clf.score(x_val, y_val),2)
     elif method == 'LR':
         clf = LogisticRegression(random_state=0, C=C)
         clf.fit(x_train, y_train)
         print('Val performance logistic regression', clf.score(x_val, y_val))
         imp_features = (np.abs(clf.coef_) > 0.15).sum()
         imp_nodes = np.where(np.abs(clf.coef_) > 0.15)[1]
+        return clf.coef_ / np.linalg.norm(clf.coef_), imp_features, imp_nodes, np.round(clf.score(x_val, y_val),2)
 def regenerate_images(model, z, decision_boundary, min_epsilon=-3, max_epsilon=3, count=5):
+    """
+    The regenerate_images function takes a model, z, and decision_boundary as input.  It then
+    constructs an inverse rotation/translation matrix and passes it to the generator.  The generator
+    expects this matrix as an inverse to avoid potentially failing numerical operations in the network.
+    The function then generates images using G(z_0, label) where z_0 is a linear combination of z and the decision boundary.
+    :param model: Pass in the model to be used for image generation
+    :param z: Generate the starting point of the line
+    :param decision_boundary: Generate images along the direction of the decision boundary
+    :param min_epsilon: Set the minimum value of lambda
+    :param max_epsilon: Set the maximum distance from the original image to generate
+    :param count: Determine the number of images that are generated
+    :return: A list of images and a list of lambdas
+    :doc-author: Trelent
+    """
     device = torch.device('cpu')
     G = model.to(device) # type: ignore
     return images, lambdas
 def generate_original_image(z, model):
+    """
+    The generate_original_image function takes in a latent vector and the model,
+    and returns an image generated from that latent vector.
+    :param z: Generate the image
+    :param model: Generate the image
+    :return: A pil image
+    :doc-author: Trelent
+    """
     device = torch.device('cpu')
     G = model.to(device) # type: ignore
     # Labels.
 def get_concepts_vectors(concepts, annotations, df, samples=100, method='LR', C=0.1):
+    """
+    The get_concepts_vectors function takes in a list of concepts, a dictionary of annotations, and the dataframe containing all the images.
+    It returns two things:
+        1) A numpy array with shape (len(concepts), 512) where each row is an embedding vector for one concept.
+        2) A set containing all nodes that are important in this separation space.
+    :param concepts: Specify the concepts to be used in the analysis
+    :param annotations: Get the annotations for each concept
+    :param df: Get the annotations for each concept
+    :param samples: Determine the number of samples to use in training the logistic regression model
+    :param method: Choose the method used to train the model
+    :param C: Control the regularization of the logistic regression
+    :return: The vectors of the concepts and the nodes that are in common for all concepts
+    :doc-author: Trelent
+    """
     important_nodes = []
+    performances = []
     vectors = np.zeros((len(concepts), 512))
     for i, conc in enumerate(concepts):
+        vec, _, imp_nodes, performance = get_separation_space(conc, annotations, df, samples=samples, method=method, C=C)
         vectors[i,:] = vec
+        performances.append(performance)
         important_nodes.append(set(imp_nodes))
     # reducer = UMAP(n_neighbors=3, # default 15, The size of local neighborhood (in terms of number of neighboring sample points) used for manifold approximation.
     # projection = reducer.fit_transform(vectors)
     nodes_in_common = set.intersection(*important_nodes)
+    return vectors, nodes_in_common, performances

pages/1_Disentanglement.py CHANGED Viewed

@@ -101,10 +101,10 @@ smoothgrad_col_1, smoothgrad_col_2, smoothgrad_col_3, smoothgrad_col_4, smoothgr
 # ---------------------------- DISPLAY COL 1 ROW 1 ------------------------------
 with output_col_1:
-    separation_vector, number_important_features, imp_nodes = get_separation_space(concept_id, annotations, ann_df)
     # st.write(f'Class ID {input_id} - {input_label}: {pred_prob*100:.3f}% confidence')
     st.write('Concept vector', separation_vector)
-    header_col_1.write(f'Concept {concept_id} - Number of relevant nodes: {number_important_features}')# - Nodes {",".join(list(imp_nodes))}')
 # ----------------------------- INPUT column 2 & 3 ----------------------------
 with input_col_2:

 # ---------------------------- DISPLAY COL 1 ROW 1 ------------------------------
 with output_col_1:
+    separation_vector, number_important_features, imp_nodes, performance = get_separation_space(concept_id, annotations, ann_df)
     # st.write(f'Class ID {input_id} - {input_label}: {pred_prob*100:.3f}% confidence')
     st.write('Concept vector', separation_vector)
+    header_col_1.write(f'Concept {concept_id} - Number of relevant nodes: {number_important_features} - Val classification performance: {performance}')# - Nodes {",".join(list(imp_nodes))}')
 # ----------------------------- INPUT column 2 & 3 ----------------------------
 with input_col_2:

pages/2_Concepts_comparison.py CHANGED Viewed

@@ -91,10 +91,10 @@ smoothgrad_col_1, smoothgrad_col_2, smoothgrad_col_3, smoothgrad_col_4, smoothgr
 # ---------------------------- DISPLAY COL 1 ROW 1 ------------------------------
 with output_col_1:
-    vectors, nodes_in_common = get_concepts_vectors(concept_ids, annotations, ann_df)
     # st.write(f'Class ID {input_id} - {input_label}: {pred_prob*100:.3f}% confidence')
     #st.write('Concept vector', separation_vector)
-    header_col_1.write(f'Concepts {", ".join(concept_ids)} - Relevant nodes in common: {nodes_in_common}')# - Nodes {",".join(list(imp_nodes))}')
     edges = []
     for i in range(len(concept_ids)):

 # ---------------------------- DISPLAY COL 1 ROW 1 ------------------------------
 with output_col_1:
+    vectors, nodes_in_common, performances = get_concepts_vectors(concept_ids, annotations, ann_df)
     # st.write(f'Class ID {input_id} - {input_label}: {pred_prob*100:.3f}% confidence')
     #st.write('Concept vector', separation_vector)
+    header_col_1.write(f'Concepts {", ".join(concept_ids)} - Relevant nodes in common: {nodes_in_common} - Performance of the concept vectors: {performances}')# - Nodes {",".join(list(imp_nodes))}')
     edges = []
     for i in range(len(concept_ids)):

tmp/nx.html DELETED Viewed

@@ -1,155 +0,0 @@
-<html>
-    <head>
-        <meta charset="utf-8">
-            <script src="lib/bindings/utils.js"></script>
-            <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/dist/vis-network.min.css" integrity="sha512-WgxfT5LWjfszlPHXRmBWHkV2eceiWTOBvrKCNbdgDYTHrT2AeLCGbF4sZlZw3UMN3WtL0tGUoIAKsu8mllg/XA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
-            <script src="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/vis-network.min.js" integrity="sha512-LnvoEWDFrqGHlHmDD2101OrLcbsfkrzoSpvtSQtxK3RMnRV0eOkhhBN2dXHKRrUU8p2DGRTk35n4O8nWSVe1mQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
-<center>
-<h1></h1>
-</center>
-<!-- <link rel="stylesheet" href="../node_modules/vis/dist/vis.min.css" type="text/css" />
-<script type="text/javascript" src="../node_modules/vis/dist/vis.js"> </script>-->
-        <link
-          href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css"
-          rel="stylesheet"
-          integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6"
-          crossorigin="anonymous"
-        />
-        <script
-          src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.bundle.min.js"
-          integrity="sha384-JEW9xMcG8R+pH31jmWH6WWP0WintQrMb4s7ZOdauHnUtxwoG2vI5DkLtS3qm9Ekf"
-          crossorigin="anonymous"
-        ></script>
-        <center>
-          <h1></h1>
-        </center>
-        <style type="text/css">
-             #mynetwork {
-                 width: 100%;
-                 height: 750px;
-                 background-color: #ffffff;
-                 border: 1px solid lightgray;
-                 position: relative;
-                 float: left;
-             }
-        </style>
-    </head>
-    <body>
-        <div class="card" style="width: 100%">
-            <div id="mynetwork" class="card-body"></div>
-        </div>
-        <script type="text/javascript">
-              // initialize global variables.
-              var edges;
-              var nodes;
-              var allNodes;
-              var allEdges;
-              var nodeColors;
-              var originalNodes;
-              var network;
-              var container;
-              var options, data;
-              var filter = {
-                  item : '',
-                  property : '',
-                  value : []
-              };
-              // This method is responsible for drawing the graph, returns the drawn network
-              function drawGraph() {
-                  var container = document.getElementById('mynetwork');
-                  // parsing and collecting nodes and edges from the python
-                  nodes = new vis.DataSet([{"color": "#97c2fc", "id": "Op Art", "label": "Op Art", "shape": "dot", "title": "Op Art"}, {"color": "#97c2fc", "id": "Minimalism", "label": "Minimalism", "shape": "dot", "title": "Minimalism"}, {"color": "#97c2fc", "id": "Surrealism", "label": "Surrealism", "shape": "dot", "title": "Surrealism"}, {"color": "#97c2fc", "id": "Baroque", "label": "Baroque", "shape": "dot", "title": "Baroque"}, {"color": "#97c2fc", "id": "Lithography", "label": "Lithography", "shape": "dot", "title": "Lithography"}, {"color": "#97c2fc", "id": "Woodcut", "label": "Woodcut", "shape": "dot", "title": "Woodcut"}, {"color": "#97c2fc", "id": "etching", "label": "etching", "shape": "dot", "title": "etching"}, {"color": "#97c2fc", "id": "Intaglio", "label": "Intaglio", "shape": "dot", "title": "Intaglio"}]);
-                  edges = new vis.DataSet([{"from": "Op Art", "title": "Op Art to Minimalism similarity 0.432", "to": "Minimalism", "value": 0.432}, {"from": "Op Art", "title": "Op Art to Surrealism similarity -0.086", "to": "Surrealism", "value": -0.086}, {"from": "Op Art", "title": "Op Art to Baroque similarity -0.047", "to": "Baroque", "value": -0.047}, {"from": "Op Art", "title": "Op Art to Lithography similarity 0.054", "to": "Lithography", "value": 0.054}, {"from": "Op Art", "title": "Op Art to Woodcut similarity 0.125", "to": "Woodcut", "value": 0.125}, {"from": "Op Art", "title": "Op Art to etching similarity 0.117", "to": "etching", "value": 0.117}, {"from": "Op Art", "title": "Op Art to Intaglio similarity 0.094", "to": "Intaglio", "value": 0.094}, {"from": "Minimalism", "title": "Minimalism to Surrealism similarity -0.042", "to": "Surrealism", "value": -0.042}, {"from": "Minimalism", "title": "Minimalism to Baroque similarity -0.052", "to": "Baroque", "value": -0.052}, {"from": "Minimalism", "title": "Minimalism to Lithography similarity 0.046", "to": "Lithography", "value": 0.046}, {"from": "Minimalism", "title": "Minimalism to Woodcut similarity 0.069", "to": "Woodcut", "value": 0.069}, {"from": "Minimalism", "title": "Minimalism to etching similarity 0.1", "to": "etching", "value": 0.1}, {"from": "Minimalism", "title": "Minimalism to Intaglio similarity 0.03", "to": "Intaglio", "value": 0.03}, {"from": "Surrealism", "title": "Surrealism to Baroque similarity 0.067", "to": "Baroque", "value": 0.067}, {"from": "Surrealism", "title": "Surrealism to Lithography similarity -0.235", "to": "Lithography", "value": -0.235}, {"from": "Surrealism", "title": "Surrealism to Woodcut similarity -0.16", "to": "Woodcut", "value": -0.16}, {"from": "Surrealism", "title": "Surrealism to etching similarity -0.171", "to": "etching", "value": -0.171}, {"from": "Surrealism", "title": "Surrealism to Intaglio similarity -0.076", "to": "Intaglio", "value": -0.076}, {"from": "Baroque", "title": "Baroque to Lithography similarity -0.125", "to": "Lithography", "value": -0.125}, {"from": "Baroque", "title": "Baroque to Woodcut similarity -0.022", "to": "Woodcut", "value": -0.022}, {"from": "Baroque", "title": "Baroque to etching similarity -0.102", "to": "etching", "value": -0.102}, {"from": "Baroque", "title": "Baroque to Intaglio similarity -0.046", "to": "Intaglio", "value": -0.046}, {"from": "Lithography", "title": "Lithography to Woodcut similarity 0.258", "to": "Woodcut", "value": 0.258}, {"from": "Lithography", "title": "Lithography to etching similarity 0.268", "to": "etching", "value": 0.268}, {"from": "Lithography", "title": "Lithography to Intaglio similarity 0.123", "to": "Intaglio", "value": 0.123}, {"from": "Woodcut", "title": "Woodcut to etching similarity 0.21", "to": "etching", "value": 0.21}, {"from": "Woodcut", "title": "Woodcut to Intaglio similarity 0.209", "to": "Intaglio", "value": 0.209}, {"from": "etching", "title": "etching to Intaglio similarity 0.178", "to": "Intaglio", "value": 0.178}]);
-                  nodeColors = {};
-                  allNodes = nodes.get({ returnType: "Object" });
-                  for (nodeId in allNodes) {
-                    nodeColors[nodeId] = allNodes[nodeId].color;
-                  }
-                  allEdges = edges.get({ returnType: "Object" });
-                  // adding nodes and edges to the graph
-                  data = {nodes: nodes, edges: edges};
-                  var options = {
-    "configure": {
-        "enabled": false
-    },
-    "edges": {
-        "color": {
-            "inherit": true
-        },
-        "smooth": {
-            "enabled": true,
-            "type": "dynamic"
-        }
-    },
-    "interaction": {
-        "dragNodes": true,
-        "hideEdgesOnDrag": false,
-        "hideNodesOnDrag": false
-    },
-    "physics": {
-        "enabled": true,
-        "stabilization": {
-            "enabled": true,
-            "fit": true,
-            "iterations": 1000,
-            "onlyDynamicEdges": false,
-            "updateInterval": 50
-        }
-    }
-};
-                  network = new vis.Network(container, data, options);
-                  return network;
-              }
-              drawGraph();
-        </script>
-    </body>
-</html>

view_predictions.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff