dvitel commited on
Commit
43bce61
1 Parent(s): 74a8e9b

Training in progress, step 1600

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[CLS0]": 50269,
3
+ "[CLS1]": 50265,
4
+ "[CLS2]": 50377,
5
+ "[CLSN]": 50257,
6
+ "[INIT]": 50258,
7
+ "[NOARG]": 50259,
8
+ "[v0]": 50409,
9
+ "[v100]": 50381,
10
+ "[v101]": 50389,
11
+ "[v102]": 50400,
12
+ "[v103]": 50309,
13
+ "[v104]": 50301,
14
+ "[v105]": 50348,
15
+ "[v106]": 50297,
16
+ "[v107]": 50310,
17
+ "[v108]": 50390,
18
+ "[v109]": 50338,
19
+ "[v10]": 50279,
20
+ "[v110]": 50290,
21
+ "[v111]": 50370,
22
+ "[v112]": 50282,
23
+ "[v113]": 50299,
24
+ "[v114]": 50283,
25
+ "[v115]": 50300,
26
+ "[v116]": 50422,
27
+ "[v117]": 50305,
28
+ "[v118]": 50277,
29
+ "[v119]": 50383,
30
+ "[v11]": 50342,
31
+ "[v120]": 50306,
32
+ "[v121]": 50376,
33
+ "[v122]": 50405,
34
+ "[v123]": 50355,
35
+ "[v124]": 50328,
36
+ "[v125]": 50275,
37
+ "[v126]": 50395,
38
+ "[v127]": 50359,
39
+ "[v128]": 50415,
40
+ "[v129]": 50264,
41
+ "[v12]": 50360,
42
+ "[v130]": 50352,
43
+ "[v131]": 50322,
44
+ "[v132]": 50261,
45
+ "[v133]": 50393,
46
+ "[v134]": 50354,
47
+ "[v135]": 50418,
48
+ "[v136]": 50285,
49
+ "[v137]": 50281,
50
+ "[v138]": 50407,
51
+ "[v139]": 50325,
52
+ "[v13]": 50336,
53
+ "[v140]": 50396,
54
+ "[v141]": 50375,
55
+ "[v142]": 50372,
56
+ "[v143]": 50403,
57
+ "[v144]": 50378,
58
+ "[v145]": 50270,
59
+ "[v146]": 50339,
60
+ "[v147]": 50262,
61
+ "[v148]": 50399,
62
+ "[v149]": 50289,
63
+ "[v14]": 50385,
64
+ "[v150]": 50321,
65
+ "[v151]": 50404,
66
+ "[v152]": 50308,
67
+ "[v153]": 50368,
68
+ "[v154]": 50362,
69
+ "[v155]": 50412,
70
+ "[v156]": 50346,
71
+ "[v157]": 50260,
72
+ "[v158]": 50416,
73
+ "[v159]": 50294,
74
+ "[v15]": 50380,
75
+ "[v16]": 50334,
76
+ "[v17]": 50373,
77
+ "[v18]": 50365,
78
+ "[v19]": 50268,
79
+ "[v1]": 50408,
80
+ "[v20]": 50311,
81
+ "[v21]": 50401,
82
+ "[v22]": 50271,
83
+ "[v23]": 50361,
84
+ "[v24]": 50266,
85
+ "[v25]": 50291,
86
+ "[v26]": 50320,
87
+ "[v27]": 50329,
88
+ "[v28]": 50417,
89
+ "[v29]": 50356,
90
+ "[v2]": 50302,
91
+ "[v30]": 50347,
92
+ "[v31]": 50343,
93
+ "[v32]": 50421,
94
+ "[v33]": 50384,
95
+ "[v34]": 50280,
96
+ "[v35]": 50349,
97
+ "[v36]": 50330,
98
+ "[v37]": 50296,
99
+ "[v38]": 50357,
100
+ "[v39]": 50344,
101
+ "[v3]": 50284,
102
+ "[v40]": 50312,
103
+ "[v41]": 50298,
104
+ "[v42]": 50358,
105
+ "[v43]": 50276,
106
+ "[v44]": 50374,
107
+ "[v45]": 50263,
108
+ "[v46]": 50307,
109
+ "[v47]": 50313,
110
+ "[v48]": 50410,
111
+ "[v49]": 50324,
112
+ "[v4]": 50317,
113
+ "[v50]": 50272,
114
+ "[v51]": 50303,
115
+ "[v52]": 50392,
116
+ "[v53]": 50286,
117
+ "[v54]": 50341,
118
+ "[v55]": 50371,
119
+ "[v56]": 50367,
120
+ "[v57]": 50391,
121
+ "[v58]": 50379,
122
+ "[v59]": 50351,
123
+ "[v5]": 50304,
124
+ "[v60]": 50318,
125
+ "[v61]": 50394,
126
+ "[v62]": 50331,
127
+ "[v63]": 50337,
128
+ "[v64]": 50333,
129
+ "[v65]": 50278,
130
+ "[v66]": 50273,
131
+ "[v67]": 50326,
132
+ "[v68]": 50274,
133
+ "[v69]": 50386,
134
+ "[v6]": 50319,
135
+ "[v70]": 50335,
136
+ "[v71]": 50402,
137
+ "[v72]": 50292,
138
+ "[v73]": 50345,
139
+ "[v74]": 50293,
140
+ "[v75]": 50363,
141
+ "[v76]": 50397,
142
+ "[v77]": 50419,
143
+ "[v78]": 50382,
144
+ "[v79]": 50398,
145
+ "[v7]": 50267,
146
+ "[v80]": 50323,
147
+ "[v81]": 50288,
148
+ "[v82]": 50353,
149
+ "[v83]": 50364,
150
+ "[v84]": 50332,
151
+ "[v85]": 50387,
152
+ "[v86]": 50411,
153
+ "[v87]": 50413,
154
+ "[v88]": 50315,
155
+ "[v89]": 50366,
156
+ "[v8]": 50314,
157
+ "[v90]": 50406,
158
+ "[v91]": 50295,
159
+ "[v92]": 50340,
160
+ "[v93]": 50287,
161
+ "[v94]": 50388,
162
+ "[v95]": 50316,
163
+ "[v96]": 50414,
164
+ "[v97]": 50350,
165
+ "[v98]": 50369,
166
+ "[v99]": 50327,
167
+ "[v9]": 50420
168
+ }
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilgpt2",
3
+ "_num_labels": 1,
4
+ "activation_function": "gelu_new",
5
+ "architectures": [
6
+ "GPT2LMHeadModel"
7
+ ],
8
+ "attn_pdrop": 0.1,
9
+ "bos_token_id": 50256,
10
+ "embd_pdrop": 0.1,
11
+ "eos_token_id": 50256,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "max_length": 912,
21
+ "model_type": "gpt2",
22
+ "n_ctx": 912,
23
+ "n_embd": 768,
24
+ "n_head": 12,
25
+ "n_inner": null,
26
+ "n_layer": 6,
27
+ "n_positions": 1024,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "task_specific_params": {
38
+ "text-generation": {
39
+ "do_sample": true,
40
+ "max_length": 50
41
+ }
42
+ },
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.24.0",
45
+ "use_cache": true,
46
+ "vocab_size": 50423
47
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5078e8045ccc0de0404391563da1d9c61ea38afa94f93efa6590328237b64099
3
+ size 334480121
special_tokens_map.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[CLSN]",
4
+ "[INIT]",
5
+ "[NOARG]",
6
+ "[v157]",
7
+ "[v132]",
8
+ "[v147]",
9
+ "[v45]",
10
+ "[v129]",
11
+ "[CLS1]",
12
+ "[v24]",
13
+ "[v7]",
14
+ "[v19]",
15
+ "[CLS0]",
16
+ "[v145]",
17
+ "[v22]",
18
+ "[v50]",
19
+ "[v66]",
20
+ "[v68]",
21
+ "[v125]",
22
+ "[v43]",
23
+ "[v118]",
24
+ "[v65]",
25
+ "[v10]",
26
+ "[v34]",
27
+ "[v137]",
28
+ "[v112]",
29
+ "[v114]",
30
+ "[v3]",
31
+ "[v136]",
32
+ "[v53]",
33
+ "[v93]",
34
+ "[v81]",
35
+ "[v149]",
36
+ "[v110]",
37
+ "[v25]",
38
+ "[v72]",
39
+ "[v74]",
40
+ "[v159]",
41
+ "[v91]",
42
+ "[v37]",
43
+ "[v106]",
44
+ "[v41]",
45
+ "[v113]",
46
+ "[v115]",
47
+ "[v104]",
48
+ "[v2]",
49
+ "[v51]",
50
+ "[v5]",
51
+ "[v117]",
52
+ "[v120]",
53
+ "[v46]",
54
+ "[v152]",
55
+ "[v103]",
56
+ "[v107]",
57
+ "[v20]",
58
+ "[v40]",
59
+ "[v47]",
60
+ "[v8]",
61
+ "[v88]",
62
+ "[v95]",
63
+ "[v4]",
64
+ "[v60]",
65
+ "[v6]",
66
+ "[v26]",
67
+ "[v150]",
68
+ "[v131]",
69
+ "[v80]",
70
+ "[v49]",
71
+ "[v139]",
72
+ "[v67]",
73
+ "[v99]",
74
+ "[v124]",
75
+ "[v27]",
76
+ "[v36]",
77
+ "[v62]",
78
+ "[v84]",
79
+ "[v64]",
80
+ "[v16]",
81
+ "[v70]",
82
+ "[v13]",
83
+ "[v63]",
84
+ "[v109]",
85
+ "[v146]",
86
+ "[v92]",
87
+ "[v54]",
88
+ "[v11]",
89
+ "[v31]",
90
+ "[v39]",
91
+ "[v73]",
92
+ "[v156]",
93
+ "[v30]",
94
+ "[v105]",
95
+ "[v35]",
96
+ "[v97]",
97
+ "[v59]",
98
+ "[v130]",
99
+ "[v82]",
100
+ "[v134]",
101
+ "[v123]",
102
+ "[v29]",
103
+ "[v38]",
104
+ "[v42]",
105
+ "[v127]",
106
+ "[v12]",
107
+ "[v23]",
108
+ "[v154]",
109
+ "[v75]",
110
+ "[v83]",
111
+ "[v18]",
112
+ "[v89]",
113
+ "[v56]",
114
+ "[v153]",
115
+ "[v98]",
116
+ "[v111]",
117
+ "[v55]",
118
+ "[v142]",
119
+ "[v17]",
120
+ "[v44]",
121
+ "[v141]",
122
+ "[v121]",
123
+ "[CLS2]",
124
+ "[v144]",
125
+ "[v58]",
126
+ "[v15]",
127
+ "[v100]",
128
+ "[v78]",
129
+ "[v119]",
130
+ "[v33]",
131
+ "[v14]",
132
+ "[v69]",
133
+ "[v85]",
134
+ "[v94]",
135
+ "[v101]",
136
+ "[v108]",
137
+ "[v57]",
138
+ "[v52]",
139
+ "[v133]",
140
+ "[v61]",
141
+ "[v126]",
142
+ "[v140]",
143
+ "[v76]",
144
+ "[v79]",
145
+ "[v148]",
146
+ "[v102]",
147
+ "[v21]",
148
+ "[v71]",
149
+ "[v143]",
150
+ "[v151]",
151
+ "[v122]",
152
+ "[v90]",
153
+ "[v138]",
154
+ "[v1]",
155
+ "[v0]",
156
+ "[v48]",
157
+ "[v86]",
158
+ "[v155]",
159
+ "[v87]",
160
+ "[v96]",
161
+ "[v128]",
162
+ "[v158]",
163
+ "[v28]",
164
+ "[v135]",
165
+ "[v77]",
166
+ "[v9]",
167
+ "[v32]",
168
+ "[v116]"
169
+ ],
170
+ "bos_token": "<|endoftext|>",
171
+ "eos_token": "<|endoftext|>",
172
+ "pad_token": "<|endoftext|>",
173
+ "unk_token": "<|endoftext|>"
174
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "name_or_path": "distilgpt2",
7
+ "special_tokens_map_file": null,
8
+ "tokenizer_class": "GPT2Tokenizer",
9
+ "unk_token": "<|endoftext|>"
10
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8df4cea213788e01c2df8a4e8f049e1ac020ea1ad66ce9260293cc141e9af6ee
3
+ size 3387
vocab.json ADDED
The diff for this file is too large to render. See raw diff