Upload folder using huggingface_hub (#7)

- b533d6edf83515a50a2ad358631dfa9704acb42768f25feb96837ae7f735c2d8 (d68d8e5f123b0a0e01dcef329a1822f771220d42)

Files changed (9) hide show

README.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
 metrics:
 - memory_disk
 - memory_inference
@@ -59,9 +60,9 @@ You can run the smashed model with these steps:
 2. Load & run the model.
     ```python
    from transformers import AutoModelForCausalLM, AutoTokenizer
-   model = AutoModelForCausalLM.from_pretrained("PrunaAI/facebook-opt-125m-AWQ-4bit-smashed", trust_remote_code=True, device_map='auto')
    tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
    input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]

 ---
 thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
+base_model: facebook/opt-125m
 metrics:
 - memory_disk
 - memory_inference
 2. Load & run the model.
     ```python
    from transformers import AutoModelForCausalLM, AutoTokenizer
+   from awq import AutoAWQForCausalLM
+   model = AutoAWQForCausalLM.from_quantized("PrunaAI/facebook-opt-125m-AWQ-4bit-smashed", trust_remote_code=True, device_map='auto')
    tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
    input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/tmp/tmpzu_5c0ni",
   "_remove_final_layer_norm": false,
   "activation_dropout": 0.0,
   "activation_function": "relu",

 {
+  "_name_or_path": "/tmp/tmpm3p07smi",
   "_remove_final_layer_norm": false,
   "activation_dropout": 0.0,
   "activation_function": "relu",

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

results.json CHANGED Viewed

@@ -1,22 +1,6 @@
 {
     "base_current_gpu_type": "NVIDIA A100-PCIE-40GB",
     "base_current_gpu_total_memory": 40339.3125,
-    "base_memory_inference_first": 690.0,
-    "base_memory_inference": 570.0,
-    "base_token_generation_latency_sync": 25.73595085144043,
-    "base_token_generation_latency_async": 25.555139780044556,
-    "base_token_generation_throughput_sync": 0.03885615129483473,
-    "base_token_generation_throughput_async": 0.03913107142465634,
-    "base_token_generation_CO2_emissions": 7.04025152217974e-06,
-    "base_token_generation_energy_consumption": 0.00201063437593726,
     "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB",
-    "smashed_current_gpu_total_memory": 40339.3125,
-    "smashed_memory_inference_first": 164.0,
-    "smashed_memory_inference": 206.0,
-    "smashed_token_generation_latency_sync": 20.38736572265625,
-    "smashed_token_generation_latency_async": 21.09651416540146,
-    "smashed_token_generation_throughput_sync": 0.04904998583945111,
-    "smashed_token_generation_throughput_async": 0.047401195863912546,
-    "smashed_token_generation_CO2_emissions": 6.962162215496025e-06,
-    "smashed_token_generation_energy_consumption": 0.001568448312001272
 }

 {
     "base_current_gpu_type": "NVIDIA A100-PCIE-40GB",
     "base_current_gpu_total_memory": 40339.3125,
     "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB",
+    "smashed_current_gpu_total_memory": 40339.3125
 }

smash_config.json CHANGED Viewed

@@ -14,7 +14,7 @@
         "controlnet": "None",
         "unet_dim": 4,
         "device": "cuda",
-        "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsmqfybacy",
         "batch_size": 1,
         "tokenizer": "GPT2TokenizerFast(name_or_path='facebook/opt-125m', vocab_size=50265, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '</s>', 'eos_token': '</s>', 'unk_token': '</s>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={\n\t1: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n}",
         "task": "text_text_generation",

         "controlnet": "None",
         "unet_dim": 4,
         "device": "cuda",
+        "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsx1vv2b7p",
         "batch_size": 1,
         "tokenizer": "GPT2TokenizerFast(name_or_path='facebook/opt-125m', vocab_size=50265, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '</s>', 'eos_token': '</s>', 'unk_token': '</s>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={\n\t1: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n}",
         "task": "text_text_generation",

special_tokens_map.json ADDED Viewed

+{
+  "bos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

+{
+  "add_bos_token": true,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "</s>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "errors": "replace",
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "</s>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff