Spaces:

dynamical-inference
/

patchsae-demo

Sleeping

App Files Files Community

hyesulim commited on Dec 23, 2024

Commit

c6fcf0b

verified ·

1 Parent(s): 0025d00

test: add lru cache

Browse files

Files changed (1) hide show

app.py +212 -59

app.py CHANGED Viewed

@@ -4,6 +4,10 @@ import pickle
 from glob import glob
 from time import sleep
 import gradio as gr
 import numpy as np
 import plotly.graph_objects as go
@@ -18,23 +22,160 @@ pkl_root = "./data/out"
 preloaded_data = {}
-def preload_activation(image_name):
-    for model in ["CLIP"] + [f"MaPLE-{ds}" for ds in DATASET_LIST]:
-        image_file = f"{pkl_root}/{model}/{image_name}.pkl.gz"
-        with gzip.open(image_file, "rb") as f:
-            preloaded_data[model] = pickle.load(f)
-def get_activation_distribution(image_name: str, model_type: str):
     activation = get_data(image_name, model_type)[0]
     noisy_features_indices = (
-        (sae_data_dict["mean_acts"]["imagenet"] > 0.1).nonzero()[0].tolist()
     )
     activation[:, noisy_features_indices] = 0
     return activation
 def get_grid_loc(evt, image):
     # Get click coordinates
@@ -203,53 +344,53 @@ def plot_activation_distribution(
     return fig
-def get_segmask(selected_image, slider_value, model_type):
-    image = data_dict[selected_image]["image"]
-    sae_act = get_data(selected_image, model_type)[0]
-    temp = sae_act[:, slider_value]
-    try:
-        mask = torch.Tensor(temp[1:,].reshape(14, 14)).view(1, 1, 14, 14)
-    except Exception as e:
-        print(sae_act.shape, slider_value)
-    mask = torch.nn.functional.interpolate(mask, (image.height, image.width))[0][
-        0
-    ].numpy()
-    mask = (mask - mask.min()) / (mask.max() - mask.min() + 1e-10)
-    base_opacity = 30
-    image_array = np.array(image)[..., :3]
-    rgba_overlay = np.zeros((mask.shape[0], mask.shape[1], 4), dtype=np.uint8)
-    rgba_overlay[..., :3] = image_array[..., :3]
-    darkened_image = (image_array[..., :3] * (base_opacity / 255)).astype(np.uint8)
-    rgba_overlay[mask == 0, :3] = darkened_image[mask == 0]
-    rgba_overlay[..., 3] = 255  # Fully opaque
-    return rgba_overlay
-def get_top_images(slider_value, toggle_btn):
-    def _get_images(dataset_path):
-        top_image_paths = [
-            os.path.join(dataset_path, "imagenet", f"{slider_value}.jpg"),
-            os.path.join(dataset_path, "imagenet-sketch", f"{slider_value}.jpg"),
-            os.path.join(dataset_path, "caltech101", f"{slider_value}.jpg"),
-        ]
-        top_images = [
-            (
-                Image.open(path)
-                if os.path.exists(path)
-                else Image.new("RGB", (256, 256), (255, 255, 255))
-            )
-            for path in top_image_paths
-        ]
-        return top_images
-    if toggle_btn:
-        top_images = _get_images("./data/top_images_masked")
-    else:
-        top_images = _get_images("./data/top_images")
-    return top_images
 def show_activation_heatmap(selected_image, slider_value, model_type, toggle_btn=False):
@@ -464,7 +605,7 @@ def load_all_data(image_root, pkl_root):
     return data_dict, sae_data_dict
-data_dict, sae_data_dict = load_all_data(image_root="./data/image", pkl_root=pkl_root)
 default_image_name = "christmas-imagenet"
@@ -643,4 +784,16 @@ with gr.Blocks(
     # Launch the app
     # demo.queue()
-    demo.launch()

 from glob import glob
 from time import sleep
+from functools import lru_cache
+import concurrent.futures
+from typing import Dict, Tuple, List
 import gradio as gr
 import numpy as np
 import plotly.graph_objects as go
 preloaded_data = {}
+# Global cache for data
+_CACHE = {
+    'data_dict': {},
+    'sae_data_dict': {},
+    'model_data': {},
+    'segmasks': {},
+    'top_images': {}
+}
+def load_all_data(image_root: str, pkl_root: str) -> Tuple[Dict, Dict]:
+    """Load all data with optimized parallel processing."""
+    # Load images in parallel
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        image_files = glob(f"{image_root}/*")
+        future_to_file = {
+            executor.submit(_load_image_file, image_file): image_file
+            for image_file in image_files
+        }
+        for future in concurrent.futures.as_completed(future_to_file):
+            image_file = future_to_file[future]
+            image_name = os.path.basename(image_file).split(".")[0]
+            result = future.result()
+            if result is not None:
+                _CACHE['data_dict'][image_name] = result
+    # Load SAE data
+    with open("./data/sae_data/mean_acts.pkl", "rb") as f:
+        _CACHE['sae_data_dict']["mean_acts"] = pickle.load(f)
+    # Load mean act values in parallel
+    datasets = ["imagenet", "imagenet-sketch", "caltech101"]
+    _CACHE['sae_data_dict']["mean_act_values"] = {}
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future_to_dataset = {
+            executor.submit(_load_mean_act_values, dataset): dataset
+            for dataset in datasets
+        }
+        for future in concurrent.futures.as_completed(future_to_dataset):
+            dataset = future_to_dataset[future]
+            result = future.result()
+            if result is not None:
+                _CACHE['sae_data_dict']["mean_act_values"][dataset] = result
+    return _CACHE['data_dict'], _CACHE['sae_data_dict']
+def _load_image_file(image_file: str) -> Dict:
+    """Helper function to load a single image file."""
+    try:
+        image = Image.open(image_file).resize((IMAGE_SIZE, IMAGE_SIZE))
+        return {
+            "image": image,
+            "image_path": image_file,
+        }
+    except Exception as e:
+        print(f"Error loading {image_file}: {e}")
+        return None
+def _load_mean_act_values(dataset: str) -> np.ndarray:
+    """Helper function to load mean act values for a dataset."""
+    try:
+        with gzip.open(f"./data/sae_data/mean_act_values_{dataset}.pkl.gz", "rb") as f:
+            return pickle.load(f)
+    except Exception as e:
+        print(f"Error loading mean act values for {dataset}: {e}")
+        return None
+@lru_cache(maxsize=1024)
+def get_data(image_name: str, model_name: str) -> np.ndarray:
+    """Cached function to get model data."""
+    cache_key = f"{model_name}_{image_name}"
+    if cache_key not in _CACHE['model_data']:
+        data_dir = f"{pkl_root}/{model_name}/{image_name}.pkl.gz"
+        with gzip.open(data_dir, "rb") as f:
+            _CACHE['model_data'][cache_key] = pickle.load(f)
+    return _CACHE['model_data'][cache_key]
+@lru_cache(maxsize=1024)
+def get_activation_distribution(image_name: str, model_type: str) -> np.ndarray:
+    """Cached function to get activation distribution."""
     activation = get_data(image_name, model_type)[0]
     noisy_features_indices = (
+        (_CACHE['sae_data_dict']["mean_acts"]["imagenet"] > 0.1).nonzero()[0].tolist()
     )
     activation[:, noisy_features_indices] = 0
     return activation
+@lru_cache(maxsize=1024)
+def get_segmask(selected_image: str, slider_value: int, model_type: str) -> np.ndarray:
+    """Cached function to get segmentation mask."""
+    cache_key = f"{selected_image}_{slider_value}_{model_type}"
+    if cache_key not in _CACHE['segmasks']:
+        image = _CACHE['data_dict'][selected_image]["image"]
+        sae_act = get_data(selected_image, model_type)[0]
+        temp = sae_act[:, slider_value]
+        mask = torch.Tensor(temp[1:].reshape(14, 14)).view(1, 1, 14, 14)
+        mask = torch.nn.functional.interpolate(mask, (image.height, image.width))[0][0].numpy()
+        mask = (mask - mask.min()) / (mask.max() - mask.min() + 1e-10)
+        base_opacity = 30
+        image_array = np.array(image)[..., :3]
+        rgba_overlay = np.zeros((mask.shape[0], mask.shape[1], 4), dtype=np.uint8)
+        rgba_overlay[..., :3] = image_array[..., :3]
+        darkened_image = (image_array[..., :3] * (base_opacity / 255)).astype(np.uint8)
+        rgba_overlay[mask == 0, :3] = darkened_image[mask == 0]
+        rgba_overlay[..., 3] = 255
+        _CACHE['segmasks'][cache_key] = rgba_overlay
+    return _CACHE['segmasks'][cache_key]
+@lru_cache(maxsize=1024)
+def get_top_images(slider_value: int, toggle_btn: bool) -> List[Image.Image]:
+    """Cached function to get top images."""
+    cache_key = f"{slider_value}_{toggle_btn}"
+    if cache_key not in _CACHE['top_images']:
+        dataset_path = "./data/top_images_masked" if toggle_btn else "./data/top_images"
+        paths = [
+            os.path.join(dataset_path, dataset, f"{slider_value}.jpg")
+            for dataset in ["imagenet", "imagenet-sketch", "caltech101"]
+        ]
+        _CACHE['top_images'][cache_key] = [
+            Image.open(path) if os.path.exists(path) else Image.new("RGB", (256, 256), (255, 255, 255))
+            for path in paths
+        ]
+    return _CACHE['top_images'][cache_key]
+# Initialize data
+data_dict, sae_data_dict = load_all_data(image_root="./data/image", pkl_root=pkl_root)
+# def preload_activation(image_name):
+#     for model in ["CLIP"] + [f"MaPLE-{ds}" for ds in DATASET_LIST]:
+#         image_file = f"{pkl_root}/{model}/{image_name}.pkl.gz"
+#         with gzip.open(image_file, "rb") as f:
+#             preloaded_data[model] = pickle.load(f)
+# def get_activation_distribution(image_name: str, model_type: str):
+#     activation = get_data(image_name, model_type)[0]
+#     noisy_features_indices = (
+#         (sae_data_dict["mean_acts"]["imagenet"] > 0.1).nonzero()[0].tolist()
+#     )
+#     activation[:, noisy_features_indices] = 0
+#     return activation
 def get_grid_loc(evt, image):
     # Get click coordinates
     return fig
+# def get_segmask(selected_image, slider_value, model_type):
+#     image = data_dict[selected_image]["image"]
+#     sae_act = get_data(selected_image, model_type)[0]
+#     temp = sae_act[:, slider_value]
+#     try:
+#         mask = torch.Tensor(temp[1:,].reshape(14, 14)).view(1, 1, 14, 14)
+#     except Exception as e:
+#         print(sae_act.shape, slider_value)
+#     mask = torch.nn.functional.interpolate(mask, (image.height, image.width))[0][
+#         0
+#     ].numpy()
+#     mask = (mask - mask.min()) / (mask.max() - mask.min() + 1e-10)
+#     base_opacity = 30
+#     image_array = np.array(image)[..., :3]
+#     rgba_overlay = np.zeros((mask.shape[0], mask.shape[1], 4), dtype=np.uint8)
+#     rgba_overlay[..., :3] = image_array[..., :3]
+#     darkened_image = (image_array[..., :3] * (base_opacity / 255)).astype(np.uint8)
+#     rgba_overlay[mask == 0, :3] = darkened_image[mask == 0]
+#     rgba_overlay[..., 3] = 255  # Fully opaque
+#     return rgba_overlay
+# def get_top_images(slider_value, toggle_btn):
+#     def _get_images(dataset_path):
+#         top_image_paths = [
+#             os.path.join(dataset_path, "imagenet", f"{slider_value}.jpg"),
+#             os.path.join(dataset_path, "imagenet-sketch", f"{slider_value}.jpg"),
+#             os.path.join(dataset_path, "caltech101", f"{slider_value}.jpg"),
+#         ]
+#         top_images = [
+#             (
+#                 Image.open(path)
+#                 if os.path.exists(path)
+#                 else Image.new("RGB", (256, 256), (255, 255, 255))
+#             )
+#             for path in top_image_paths
+#         ]
+#         return top_images
+#     if toggle_btn:
+#         top_images = _get_images("./data/top_images_masked")
+#     else:
+#         top_images = _get_images("./data/top_images")
+#     return top_images
 def show_activation_heatmap(selected_image, slider_value, model_type, toggle_btn=False):
     return data_dict, sae_data_dict
+# data_dict, sae_data_dict = load_all_data(image_root="./data/image", pkl_root=pkl_root)
 default_image_name = "christmas-imagenet"
     # Launch the app
     # demo.queue()
+    # demo.launch()
+if __name__ == "__main__":
+    demo.queue()  # Enable queuing for better handling of concurrent users
+    demo.launch(
+        server_name="0.0.0.0",  # Allow external access
+        server_port=7860,
+        share=False,  # Set to True if you want to create a public URL
+        show_error=True,
+        # Optimize concurrency
+        max_threads=8,  # Adjust based on your CPU cores
+    )