sharpenb commited on
Commit
a62ae42
1 Parent(s): b3f5061

Upload folder using huggingface_hub (#7)

Browse files

- b533d6edf83515a50a2ad358631dfa9704acb42768f25feb96837ae7f735c2d8 (d68d8e5f123b0a0e01dcef329a1822f771220d42)

README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
  thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
 
3
  metrics:
4
  - memory_disk
5
  - memory_inference
@@ -59,9 +60,9 @@ You can run the smashed model with these steps:
59
  2. Load & run the model.
60
  ```python
61
  from transformers import AutoModelForCausalLM, AutoTokenizer
62
-
63
 
64
- model = AutoModelForCausalLM.from_pretrained("PrunaAI/facebook-opt-125m-AWQ-4bit-smashed", trust_remote_code=True, device_map='auto')
65
  tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
66
 
67
  input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
 
1
  ---
2
  thumbnail: "https://assets-global.website-files.com/646b351987a8d8ce158d1940/64ec9e96b4334c0e1ac41504_Logo%20with%20white%20text.svg"
3
+ base_model: facebook/opt-125m
4
  metrics:
5
  - memory_disk
6
  - memory_inference
 
60
  2. Load & run the model.
61
  ```python
62
  from transformers import AutoModelForCausalLM, AutoTokenizer
63
+ from awq import AutoAWQForCausalLM
64
 
65
+ model = AutoAWQForCausalLM.from_quantized("PrunaAI/facebook-opt-125m-AWQ-4bit-smashed", trust_remote_code=True, device_map='auto')
66
  tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
67
 
68
  input_ids = tokenizer("What is the color of prunes?,", return_tensors='pt').to(model.device)["input_ids"]
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/tmp/tmpzu_5c0ni",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
 
1
  {
2
+ "_name_or_path": "/tmp/tmpm3p07smi",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
results.json CHANGED
@@ -1,22 +1,6 @@
1
  {
2
  "base_current_gpu_type": "NVIDIA A100-PCIE-40GB",
3
  "base_current_gpu_total_memory": 40339.3125,
4
- "base_memory_inference_first": 690.0,
5
- "base_memory_inference": 570.0,
6
- "base_token_generation_latency_sync": 25.73595085144043,
7
- "base_token_generation_latency_async": 25.555139780044556,
8
- "base_token_generation_throughput_sync": 0.03885615129483473,
9
- "base_token_generation_throughput_async": 0.03913107142465634,
10
- "base_token_generation_CO2_emissions": 7.04025152217974e-06,
11
- "base_token_generation_energy_consumption": 0.00201063437593726,
12
  "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB",
13
- "smashed_current_gpu_total_memory": 40339.3125,
14
- "smashed_memory_inference_first": 164.0,
15
- "smashed_memory_inference": 206.0,
16
- "smashed_token_generation_latency_sync": 20.38736572265625,
17
- "smashed_token_generation_latency_async": 21.09651416540146,
18
- "smashed_token_generation_throughput_sync": 0.04904998583945111,
19
- "smashed_token_generation_throughput_async": 0.047401195863912546,
20
- "smashed_token_generation_CO2_emissions": 6.962162215496025e-06,
21
- "smashed_token_generation_energy_consumption": 0.001568448312001272
22
  }
 
1
  {
2
  "base_current_gpu_type": "NVIDIA A100-PCIE-40GB",
3
  "base_current_gpu_total_memory": 40339.3125,
 
 
 
 
 
 
 
 
4
  "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB",
5
+ "smashed_current_gpu_total_memory": 40339.3125
 
 
 
 
 
 
 
 
6
  }
smash_config.json CHANGED
@@ -14,7 +14,7 @@
14
  "controlnet": "None",
15
  "unet_dim": 4,
16
  "device": "cuda",
17
- "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsmqfybacy",
18
  "batch_size": 1,
19
  "tokenizer": "GPT2TokenizerFast(name_or_path='facebook/opt-125m', vocab_size=50265, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '</s>', 'eos_token': '</s>', 'unk_token': '</s>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=True), added_tokens_decoder={\n\t1: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n}",
20
  "task": "text_text_generation",
 
14
  "controlnet": "None",
15
  "unet_dim": 4,
16
  "device": "cuda",
17
+ "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsx1vv2b7p",
18
  "batch_size": 1,
19
  "tokenizer": "GPT2TokenizerFast(name_or_path='facebook/opt-125m', vocab_size=50265, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '</s>', 'eos_token': '</s>', 'unk_token': '</s>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=True), added_tokens_decoder={\n\t1: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n}",
20
  "task": "text_text_generation",
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "</s>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "1": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "2": {
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ }
21
+ },
22
+ "bos_token": "</s>",
23
+ "clean_up_tokenization_spaces": true,
24
+ "eos_token": "</s>",
25
+ "errors": "replace",
26
+ "legacy": false,
27
+ "model_max_length": 1000000000000000019884624838656,
28
+ "pad_token": "<pad>",
29
+ "tokenizer_class": "GPT2Tokenizer",
30
+ "unk_token": "</s>"
31
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff