willtensora commited on
Commit
128df71
·
verified ·
1 Parent(s): 23d48b4

Training in progress, epoch 0

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "oopsung/llama2-7b-n-ox-test-v1",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
- "v_proj",
25
- "up_proj",
26
  "q_proj",
27
- "down_proj",
 
28
  "gate_proj",
29
- "k_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "q_proj",
24
+ "v_proj",
25
+ "o_proj",
26
  "gate_proj",
27
+ "up_proj",
28
+ "k_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e72b4a3235c259913087ac0799deb5f0a9ce95fa5a2731f32072330633125a2
3
- size 80013120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a9ea1223ba85bbea9348c544be11652f89e0e757979feed1bcb01f6c918891
3
+ size 25271744
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "oopsung/llama2-7b-n-ox-test-v1",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
@@ -8,26 +8,24 @@
8
  "attention_dropout": 0.0,
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
- "head_dim": 128,
12
  "hidden_act": "silu",
13
- "hidden_size": 4096,
14
  "initializer_range": 0.02,
15
- "intermediate_size": 11008,
16
- "max_length": 4096,
17
  "max_position_embeddings": 2048,
18
  "mlp_bias": false,
19
  "model_type": "llama",
20
  "num_attention_heads": 32,
21
- "num_hidden_layers": 32,
22
- "num_key_value_heads": 32,
23
- "pad_token_id": 0,
24
  "pretraining_tp": 1,
25
  "rms_norm_eps": 1e-05,
26
  "rope_scaling": null,
27
  "rope_theta": 10000.0,
28
  "tie_word_embeddings": false,
29
- "torch_dtype": "float16",
30
  "transformers_version": "4.46.0",
31
  "use_cache": false,
32
- "vocab_size": 46336
33
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
 
8
  "attention_dropout": 0.0,
9
  "bos_token_id": 1,
10
  "eos_token_id": 2,
11
+ "head_dim": 64,
12
  "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
  "initializer_range": 0.02,
15
+ "intermediate_size": 5632,
 
16
  "max_position_embeddings": 2048,
17
  "mlp_bias": false,
18
  "model_type": "llama",
19
  "num_attention_heads": 32,
20
+ "num_hidden_layers": 22,
21
+ "num_key_value_heads": 4,
 
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
24
  "rope_scaling": null,
25
  "rope_theta": 10000.0,
26
  "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
  "transformers_version": "4.46.0",
29
  "use_cache": false,
30
+ "vocab_size": 32000
31
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json CHANGED
@@ -26,46 +26,6 @@
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
- },
30
- "46331": {
31
- "content": "<|sep|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "46332": {
39
- "content": "<|endoftext|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": true
45
- },
46
- "46333": {
47
- "content": "<|acc|>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": true
53
- },
54
- "46334": {
55
- "content": "<|rrn|>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": true
61
- },
62
- "46335": {
63
- "content": "<|tel|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
  }
70
  },
71
  "bos_token": "<s>",
@@ -73,8 +33,9 @@
73
  "clean_up_tokenization_spaces": false,
74
  "eos_token": "</s>",
75
  "legacy": false,
76
- "model_max_length": 1000000000000000019884624838656,
77
  "pad_token": "</s>",
 
78
  "sp_model_kwargs": {},
79
  "tokenizer_class": "LlamaTokenizer",
80
  "unk_token": "<unk>",
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
31
  "bos_token": "<s>",
 
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "legacy": false,
36
+ "model_max_length": 2048,
37
  "pad_token": "</s>",
38
+ "padding_side": "right",
39
  "sp_model_kwargs": {},
40
  "tokenizer_class": "LlamaTokenizer",
41
  "unk_token": "<unk>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0a0205286a7bd4fca07ce15cfa80889162bf3c590f02085703f2bd7a8a716c0
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f7cf9b3f4a9b6aee0af9bb352b3071b1f748975269b94023838baf634daee8c
3
  size 6776