Text Generation
Transformers
ONNX
llama
sparse
code
deepsparse
Abhinav Agarwalla commited on
Commit
3f690cf
1 Parent(s): c8df07f

Updating pruned70+quantized model

Browse files
README.md CHANGED
@@ -50,8 +50,8 @@ Model evaluation metrics and results.
50
 
51
  | Benchmark | Metric | Llama-2-7b-evolcodealpaca | Llama-2-7b-pruned70-retrained-evolcodealpaca-quant-ds |
52
  |------------------------------------------------|---------------|-------------|-------------------------------|
53
- | [HumanEval](https://arxiv.org/abs/2107.03374) | pass@1 | 32.03 | 34.76 |
54
 
55
  ## Help
56
 
57
- For further support, and discussions on these models and AI in general, join [Neural Magic's Slack Community](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-q1a1cnvo-YBoICSIw3L1dmQpjBeDurQ)
 
50
 
51
  | Benchmark | Metric | Llama-2-7b-evolcodealpaca | Llama-2-7b-pruned70-retrained-evolcodealpaca-quant-ds |
52
  |------------------------------------------------|---------------|-------------|-------------------------------|
53
+ | [HumanEval](https://arxiv.org/abs/2107.03374) | pass@1 | 32.03 | 35.02 |
54
 
55
  ## Help
56
 
57
+ For further support, and discussions on these models and AI in general, join [Neural Magic's Slack Community](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-q1a1cnvo-YBoICSIw3L1dmQpjBeDurQ)
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/abhinav/src/llama-recipes/llama_7b_evol_codealpaca_sparse/sparse-sft_cerebras_70sp_uniform_llama_lr5e-4_epochs1_gradclipFalse_cosine_nodistillation-/combined/",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -21,8 +21,9 @@
21
  "rope_scaling": null,
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
- "torch_dtype": "float32",
25
- "transformers_version": "1.7.0.20240313",
 
26
  "use_cache": true,
27
  "vocab_size": 32000
28
  }
 
1
  {
2
+ "_name_or_path": "neuralmagic/Llama-2-7b-pruned70-retrained-evolcodealpaca-quant-ds",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
21
  "rope_scaling": null,
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
+ "tokenizer_class": "LlamaTokenizerFast",
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "1.7.0.20240504",
27
  "use_cache": true,
28
  "vocab_size": 32000
29
  }
model-orig.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80041fb9222105cf527dd41b05dd62bfea5d0a4632c3ecc11d51bd688dee2525
3
- size 1222296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e88bf46725811cb455886e4fa944eb5c309512b8793bc4d219e505ab5aaa18b
3
+ size 1222611
model.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea54ad705a5d602bedd74eb1bf6855f0f109c1b275b149361ce8a0d126630533
3
  size 7154772992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:365753b3f8cd015b79c51aca5caa0e56d208f731a5f4599720ee6f9be3c6f82b
3
  size 7154772992
model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d208294086849085cced1db1ae81ba4720f449452e1c0eadfc8ec6234170df3
3
- size 1207241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0583ad67585c0fd1758a3d3845814bf5158a5cdbb928b9999f98177b7cf6534
3
+ size 1207556
special_tokens_map.json CHANGED
@@ -13,13 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "</s>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 2048,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 384,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -29,15 +29,11 @@
29
  "clean_up_tokenization_spaces": false,
30
  "eos_token": "</s>",
31
  "legacy": false,
32
- "max_length": 2048,
33
  "model_max_length": 1000000000000000019884624838656,
34
  "pad_token": "</s>",
35
  "padding_side": "right",
36
  "sp_model_kwargs": {},
37
- "stride": 0,
38
  "tokenizer_class": "LlamaTokenizer",
39
- "truncation_side": "right",
40
- "truncation_strategy": "longest_first",
41
  "unk_token": "<unk>",
42
  "use_default_system_prompt": false
43
  }
 
29
  "clean_up_tokenization_spaces": false,
30
  "eos_token": "</s>",
31
  "legacy": false,
 
32
  "model_max_length": 1000000000000000019884624838656,
33
  "pad_token": "</s>",
34
  "padding_side": "right",
35
  "sp_model_kwargs": {},
 
36
  "tokenizer_class": "LlamaTokenizer",
 
 
37
  "unk_token": "<unk>",
38
  "use_default_system_prompt": false
39
  }