AmberYifan commited on
Commit
5c00897
1 Parent(s): 322ebe5

Training in progress, step 100

Browse files
cl100k_base.tiktoken ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/Phi-3-small-8k-instruct",
3
+ "architectures": [
4
+ "Phi3SmallForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout_prob": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "microsoft/Phi-3-small-8k-instruct--configuration_phi3_small.Phi3SmallConfig",
10
+ "AutoModelForCausalLM": "microsoft/Phi-3-small-8k-instruct--modeling_phi3_small.Phi3SmallForCausalLM",
11
+ "AutoModelForSequenceClassification": "microsoft/Phi-3-small-8k-instruct--modeling_phi3_small.Phi3SmallForSequenceClassification",
12
+ "AutoTokenizer": "microsoft/Phi-3-small-8k-instruct--tokenization_phi3_small.Phi3SmallTokenizer"
13
+ },
14
+ "blocksparse_block_size": 64,
15
+ "blocksparse_homo_head_pattern": false,
16
+ "blocksparse_num_local_blocks": 16,
17
+ "blocksparse_triton_kernel_block_size": 64,
18
+ "blocksparse_vert_stride": 8,
19
+ "bos_token_id": 100257,
20
+ "dense_attention_every_n_layers": 2,
21
+ "dummy_token_indices": [
22
+ 100256,
23
+ 100258,
24
+ 100259,
25
+ 100260,
26
+ 100264,
27
+ 100265,
28
+ 100267,
29
+ 100268,
30
+ 100269,
31
+ 100270,
32
+ 100271,
33
+ 100272,
34
+ 100273,
35
+ 100274,
36
+ 100275,
37
+ 100276,
38
+ 100277,
39
+ 100278,
40
+ 100279,
41
+ 100280,
42
+ 100281,
43
+ 100282,
44
+ 100283,
45
+ 100284,
46
+ 100285,
47
+ 100286,
48
+ 100287,
49
+ 100288,
50
+ 100289,
51
+ 100290,
52
+ 100291,
53
+ 100292,
54
+ 100293,
55
+ 100294,
56
+ 100295,
57
+ 100296,
58
+ 100297,
59
+ 100298,
60
+ 100299,
61
+ 100300,
62
+ 100301,
63
+ 100302,
64
+ 100303,
65
+ 100304,
66
+ 100305,
67
+ 100306,
68
+ 100307,
69
+ 100308,
70
+ 100309,
71
+ 100310,
72
+ 100311,
73
+ 100312,
74
+ 100313,
75
+ 100314,
76
+ 100315,
77
+ 100316,
78
+ 100317,
79
+ 100318,
80
+ 100319,
81
+ 100320,
82
+ 100321,
83
+ 100322,
84
+ 100323,
85
+ 100324,
86
+ 100325,
87
+ 100326,
88
+ 100327,
89
+ 100328,
90
+ 100329,
91
+ 100330,
92
+ 100331,
93
+ 100332,
94
+ 100333,
95
+ 100334,
96
+ 100335,
97
+ 100336,
98
+ 100337,
99
+ 100338,
100
+ 100339,
101
+ 100340,
102
+ 100341,
103
+ 100342,
104
+ 100343,
105
+ 100344,
106
+ 100345,
107
+ 100346,
108
+ 100347,
109
+ 100348,
110
+ 100349,
111
+ 100350,
112
+ 100351
113
+ ],
114
+ "embedding_dropout_prob": 0.1,
115
+ "eos_token_id": 100257,
116
+ "ff_dim_multiplier": null,
117
+ "ff_intermediate_size": 14336,
118
+ "ffn_dropout_prob": 0.1,
119
+ "gegelu_limit": 20.0,
120
+ "gegelu_pad_to_256": true,
121
+ "hidden_act": "gegelu",
122
+ "hidden_size": 4096,
123
+ "initializer_range": 0.02,
124
+ "layer_norm_epsilon": 1e-05,
125
+ "max_position_embeddings": 8192,
126
+ "model_type": "phi3small",
127
+ "mup_attn_multiplier": 1.0,
128
+ "mup_embedding_multiplier": 10.0,
129
+ "mup_use_scaling": true,
130
+ "mup_width_multiplier": 8.0,
131
+ "num_attention_heads": 32,
132
+ "num_hidden_layers": 32,
133
+ "num_key_value_heads": 8,
134
+ "pad_sequence_to_multiple_of_64": true,
135
+ "reorder_and_upcast_attn": false,
136
+ "rope_embedding_base": 1000000,
137
+ "rope_position_scale": 1.0,
138
+ "rope_scaling": null,
139
+ "torch_dtype": "bfloat16",
140
+ "transformers_version": "4.37.0",
141
+ "use_cache": false,
142
+ "vocab_size": 100352
143
+ }
runs/Jul23_17-49-20_gilbreth-j001.rcac.purdue.edu/events.out.tfevents.1721771498.gilbreth-j001.rcac.purdue.edu.110065.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f46ee8621ee5ed46c8d8dbc895e511ea023a9134797af1c14bbb7d7515e2f5d
3
+ size 13632
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>"
5
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "69caae1f2acea34b26f535fecb1f2abb9a304695",
3
+ "_from_auto": true,
4
+ "added_tokens_decoder": {},
5
+ "auto_map": {
6
+ "AutoTokenizer": [
7
+ "tokenization_phi3_small.Phi3SmallTokenizer",
8
+ "tokenization_phi3_small.Phi3SmallTokenizer"
9
+ ]
10
+ },
11
+ "bos_token": "<|endoftext|>",
12
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
13
+ "clean_up_tokenization_spaces": true,
14
+ "eos_token": "<|endoftext|>",
15
+ "model_max_length": 8192,
16
+ "pad_token": "<|endoftext|>",
17
+ "revision": "main",
18
+ "tokenizer_class": "Phi3SmallTokenizer"
19
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec43a1c59490d295a7873606e2a157d32f9f67206fb3c59e8f50dd416e2b5888
3
+ size 5880