sharpenb commited on
Commit
4fcbf4f
1 Parent(s): b953b6a

3673bf03b7a9f9fa2e3567207300de5642338e24f9c58871ad37d5783beb80b9

Browse files
Files changed (3) hide show
  1. README.md +4 -1
  2. config.json +2 -2
  3. smash_config.json +1 -1
README.md CHANGED
@@ -50,7 +50,10 @@ metrics:
50
  You can run the smashed model with these steps:
51
 
52
  0. Check requirements from the original repo facebook/opt-125m installed. In particular, check python, cuda, and transformers versions.
53
- 1.
 
 
 
54
  2. Load & run the model.
55
  ```python
56
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
50
  You can run the smashed model with these steps:
51
 
52
  0. Check requirements from the original repo facebook/opt-125m installed. In particular, check python, cuda, and transformers versions.
53
+ 1. Make sure that you have installed quantization related packages.
54
+ ```bash
55
+ pip install transformers accelerate bitsandbytes>0.37.0
56
+ ```
57
  2. Load & run the model.
58
  ```python
59
  from transformers import AutoModelForCausalLM, AutoTokenizer
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/tmp/tmpu5cuq6lh",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
@@ -24,7 +24,7 @@
24
  "pad_token_id": 1,
25
  "prefix": "</s>",
26
  "quantization_config": {
27
- "bnb_4bit_compute_dtype": "float32",
28
  "bnb_4bit_quant_type": "fp4",
29
  "bnb_4bit_use_double_quant": true,
30
  "llm_int8_enable_fp32_cpu_offload": false,
 
1
  {
2
+ "_name_or_path": "/tmp/tmpel47pjzp",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
 
24
  "pad_token_id": 1,
25
  "prefix": "</s>",
26
  "quantization_config": {
27
+ "bnb_4bit_compute_dtype": "bfloat16",
28
  "bnb_4bit_quant_type": "fp4",
29
  "bnb_4bit_use_double_quant": true,
30
  "llm_int8_enable_fp32_cpu_offload": false,
smash_config.json CHANGED
@@ -8,7 +8,7 @@
8
  "compilers": "[]",
9
  "task": "text_text_generation",
10
  "device": "cuda",
11
- "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsfrylfbzs",
12
  "batch_size": 1,
13
  "tokenizer": "GPT2TokenizerFast(name_or_path='facebook/opt-125m', vocab_size=50265, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '</s>', 'eos_token': '</s>', 'unk_token': '</s>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=True), added_tokens_decoder={\n\t1: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n}",
14
  "model_config": "{'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['OPTForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': '</s>', 'bos_token_id': 2, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/opt-125m', 'transformers_version': '4.37.1', 'activation_dropout': 0.0, 'model_type': 'opt', 'vocab_size': 50272, 'max_position_embeddings': 2048, 'num_attention_heads': 12, 'word_embed_proj_dim': 768, 'ffn_dim': 3072, 'hidden_size': 768, 'num_hidden_layers': 12, 'dropout': 0.1, 'attention_dropout': 0.0, 'activation_function': 'relu', 'init_std': 0.02, 'layerdrop': 0.0, 'use_cache': True, 'do_layer_norm_before': True, 'enable_bias': True, 'layer_norm_elementwise_affine': True, '_remove_final_layer_norm': False}",
 
8
  "compilers": "[]",
9
  "task": "text_text_generation",
10
  "device": "cuda",
11
+ "cache_dir": "/ceph/hdd/staff/charpent/.cache/models02q0690g",
12
  "batch_size": 1,
13
  "tokenizer": "GPT2TokenizerFast(name_or_path='facebook/opt-125m', vocab_size=50265, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '</s>', 'eos_token': '</s>', 'unk_token': '</s>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=True), added_tokens_decoder={\n\t1: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n}",
14
  "model_config": "{'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['OPTForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': '</s>', 'bos_token_id': 2, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'facebook/opt-125m', 'transformers_version': '4.37.1', 'activation_dropout': 0.0, 'model_type': 'opt', 'vocab_size': 50272, 'max_position_embeddings': 2048, 'num_attention_heads': 12, 'word_embed_proj_dim': 768, 'ffn_dim': 3072, 'hidden_size': 768, 'num_hidden_layers': 12, 'dropout': 0.1, 'attention_dropout': 0.0, 'activation_function': 'relu', 'init_std': 0.02, 'layerdrop': 0.0, 'use_cache': True, 'do_layer_norm_before': True, 'enable_bias': True, 'layer_norm_elementwise_affine': True, '_remove_final_layer_norm': False}",