Upload model

Browse files

Files changed (9) hide show

cats.py +151 -0
config.json +4 -0
model-00001-of-00006.safetensors +1 -1
model-00002-of-00006.safetensors +1 -1
model-00003-of-00006.safetensors +1 -1
model-00004-of-00006.safetensors +1 -1
model-00005-of-00006.safetensors +1 -1
model-00006-of-00006.safetensors +2 -2
model.safetensors.index.json +2 -1

cats.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import importlib
+import json
+import os
+from typing import List
+import numpy as np
+import torch
+import torch.nn as nn
+from transformers import (
+    PretrainedConfig,
+    PreTrainedModel,
+    AutoConfig, AutoModelForCausalLM,
+)
+from utils.constants import MISTRAL_7B
+from utils.utils import _get_submodules
+class Cats(nn.Module):
+    def __init__(
+        self,
+        wrapped_module: nn.Module,
+        threshold: float = 0,
+        hist_num_bins: int = 1000,
+        hist_min: int = -1,
+        hist_max: int = 1,
+    ):
+        super(Cats, self).__init__()
+        self.wrapped_module = wrapped_module
+        self.threshold = nn.Parameter(torch.tensor(threshold), requires_grad=False)
+        self.histogram_bins = torch.linspace(hist_min, hist_max, hist_num_bins - 2)
+        self.histogram_bins = torch.cat(
+            [torch.tensor([-torch.inf]), self.histogram_bins, torch.tensor([torch.inf])]
+        )
+        self.hist_counts = torch.zeros(hist_num_bins - 1)
+        self.abs_hist_counts = torch.zeros(hist_num_bins - 1)
+        self.collect_stats = True
+    def disable_collect_stats(self):
+        self.collect_stats = False
+    def enable_collect_stats(self):
+        self.collect_stats = True
+    def set_threshold(self, threshold: float):
+        self.threshold = nn.Parameter(torch.tensor(threshold), requires_grad=False)
+    def forward(self, x):
+        x = self.wrapped_module(x)
+        if self.collect_stats:
+            self.hist_counts += torch.histogram(x, bins=self.histogram_bins)[0]
+            self.abs_hist_counts += torch.histogram(
+                torch.abs(x), bins=self.histogram_bins
+            )[0]
+        x[abs(x) < self.threshold] = 0
+        return x
+# Function to load existing data from a JSON file
+def load_data(file_path):
+    try:
+        with open(file_path, "r") as json_file:
+            return json.load(json_file)
+    except FileNotFoundError:
+        return {}  # Return an empty dictionary if the file does not exist
+# Function to save the dictionary to a JSON file
+def save_to_json(data, file_path):
+    os.makedirs(os.path.dirname(file_path), exist_ok=True)
+    with open(file_path, "w") as json_file:
+        json.dump(data, json_file, indent=4)
+class CatsConfig(PretrainedConfig):
+    model_type = "cats_model"
+    def __init__(
+        self,
+        wrapped_model_config=AutoConfig.from_pretrained(MISTRAL_7B),
+        wrapped_model_class_name: str = "MistralForCausalLM",
+        target_modules: List[str] = ["act_fn"],
+        target_sparsity: float = 0.5,
+        **kwargs,
+    ):
+        self.target_modules = target_modules
+        self.target_sparsity = target_sparsity
+        self.wrapped_model_class_name = wrapped_model_class_name
+        self.__dict__.update(wrapped_model_config.__dict__)
+        super().__init__(**kwargs)
+class CatsModel(PreTrainedModel):
+    config_class = CatsConfig
+    def __init__(self, config, wrapped_model_pretrained_dir: str = None, **kwargs):
+        super().__init__(config)
+        transformers_module = importlib.import_module("transformers")
+        self.wrapped_model_class = getattr(transformers_module, config.wrapped_model_class_name)
+        self.wrapped_model = self.wrapped_model_class(config)
+        if wrapped_model_pretrained_dir is not None:
+            self.wrapped_model = self.wrapped_model_class.from_pretrained(wrapped_model_pretrained_dir)
+        print(self.__dict__)
+        self.inject_cats()
+    def inject_cats(self):
+        for name, module in self.wrapped_model.named_modules():
+            parent, target, target_name = _get_submodules(self.wrapped_model, name)
+            if target_name in self.config.target_modules:
+                print(f"{name} is replaced.")
+                # Replace target module with target module + CATS
+                cats = Cats(wrapped_module=target)
+                setattr(parent, target_name, cats)
+    def enable_collect_stats(self):
+        for module in self.wrapped_model.named_modules():
+            if isinstance(module, Cats):
+                module.enable_collect_stats()
+    def disable_adapters(self) -> None:
+        for module in self.wrapped_model.named_modules():
+            if isinstance(module, Cats):
+                module.disable_collect_stats()
+    # def __getattr__(self, name: str):
+    #     """Forward missing attributes to the wrapped module."""
+    #     try:
+    #         return super().__getattr__(name)  # defer to nn.Module's logic
+    #     except AttributeError:
+    #         return getattr(self.model, name)
+def simple_exp():
+    model_dir = MISTRAL_7B
+    config = AutoConfig.from_pretrained(model_dir)
+    cats_config = CatsConfig(config, wrapped_model_class_name="MistralForCausalLM")
+    model = CatsModel(cats_config, wrapped_model_pretrained_dir=None)
+    print(model)
+    print(model.wrapped_model)
+    print(model.config)
+    CatsConfig.register_for_auto_class()
+    CatsModel.register_for_auto_class("AutoModelForCausalLM")
+    repo_id = "thrunlab/cats_exp"
+    model.push_to_hub(repo_id)
+    model = AutoModelForCausalLM.from_pretrained(repo_id, trust_remote_code=True)
+if __name__ == "__main__":
+    simple_exp()

config.json CHANGED Viewed

@@ -3,6 +3,10 @@
     "CatsModel"
   ],
   "attention_dropout": 0.0,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,

     "CatsModel"
   ],
   "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "cats.CatsConfig",
+    "AutoModelForCausalLM": "cats.CatsModel"
+  },
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,

model-00001-of-00006.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:518889943dc9e710f8b6591e913c7828e0b7809753d4ec1425f6361e62819d7c
 size 4987198176

 version https://git-lfs.github.com/spec/v1
+oid sha256:02be45417249d551272c3ee9d68883dedc06937e310525f19a4aa5f079e290a6
 size 4987198176

model-00002-of-00006.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad42e8e9d7e18c3b36d41e6fdbdfa396fde96ab4805eae83789be3c5edc90bcc
 size 4899117664

 version https://git-lfs.github.com/spec/v1
+oid sha256:421b3cda4afa202632eae121e820d640e18b13cb27b5af248ddc9d68601ed765
 size 4899117664

model-00003-of-00006.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69d306913ca9bba54934aea5733d22b2f4f5277e6863e82849bd45277b6fae1f
 size 4999814528

 version https://git-lfs.github.com/spec/v1
+oid sha256:42db0dfcfa4abc870330f959fb048964cf663d8b2c3c8aee01da6329f2ab0bd7
 size 4999814528

model-00004-of-00006.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6589abda0af5979e33babc4ebf35ee403e0fac9f2cca16462601fbf2753050f2
 size 4999814528

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc0fe251d811c7d07606bab7cb44dd624c068ae10f240bf1d02a3faec2c5e778
 size 4999814528

model-00005-of-00006.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11ea95a1c1474e697a4349921d82948c4888ee3a55fe296e0bbaaab4b2a4ac78
 size 4832008712

 version https://git-lfs.github.com/spec/v1
+oid sha256:62446988cc65a531f9291d839567b046ce05c63e290a327018c76c43493f3b8c
 size 4832008712

model-00006-of-00006.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb6264074829100dcc0a4f76db86aa0e47e9aaa91fe5e74f5c3d696ef93aee02
-size 3724727904

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7d274e4390c9336160ddb433d1fb9165ba660269129305ff94b6746998a9088
+size 4249016024

model.safetensors.index.json CHANGED Viewed

@@ -1,8 +1,9 @@
 {
   "metadata": {
-    "total_size": 28442640640
   },
   "weight_map": {
     "wrapped_model.model.embed_tokens.weight": "model-00001-of-00006.safetensors",
     "wrapped_model.model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
     "wrapped_model.model.layers.0.mlp.act_fn.threshold": "model-00001-of-00006.safetensors",

 {
   "metadata": {
+    "total_size": 28966928640
   },
   "weight_map": {
+    "wrapped_model.lm_head.weight": "model-00006-of-00006.safetensors",
     "wrapped_model.model.embed_tokens.weight": "model-00001-of-00006.safetensors",
     "wrapped_model.model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
     "wrapped_model.model.layers.0.mlp.act_fn.threshold": "model-00001-of-00006.safetensors",