denizzhansahin commited on
Commit
4cec6ec
1 Parent(s): 077c855

Upload model

Browse files
Files changed (4) hide show
  1. README.md +203 -0
  2. config.json +38 -0
  3. generation_config.json +6 -0
  4. tf_model.h5 +3 -0
README.md ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: gpt2
4
+ tags:
5
+ - generated_from_keras_callback
6
+ model-index:
7
+ - name: deneme_linux
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information Keras had access to. You should
12
+ probably proofread and complete it, then remove this comment. -->
13
+
14
+ # deneme_linux
15
+
16
+ This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Train Loss: 2.7996
19
+ - Validation Loss: 7.3305
20
+ - Epoch: 149
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'module': 'transformers.optimization_tf', 'class_name': 'WarmUp', 'config': {'initial_learning_rate': 5e-05, 'decay_schedule_fn': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 5e-05, 'decay_steps': -995, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'warmup_steps': 1000, 'power': 1.0, 'name': None}, 'registered_name': 'WarmUp'}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.01}
40
+ - training_precision: float32
41
+
42
+ ### Training results
43
+
44
+ | Train Loss | Validation Loss | Epoch |
45
+ |:----------:|:---------------:|:-----:|
46
+ | 9.3341 | 9.3329 | 0 |
47
+ | 9.3273 | 9.3261 | 1 |
48
+ | 9.3176 | 9.3147 | 2 |
49
+ | 9.3034 | 9.2988 | 3 |
50
+ | 9.2891 | 9.2790 | 4 |
51
+ | 9.2696 | 9.2563 | 5 |
52
+ | 9.2427 | 9.2321 | 6 |
53
+ | 9.2079 | 9.2076 | 7 |
54
+ | 9.1797 | 9.1831 | 8 |
55
+ | 9.1538 | 9.1594 | 9 |
56
+ | 9.1238 | 9.1368 | 10 |
57
+ | 9.0976 | 9.1149 | 11 |
58
+ | 9.0693 | 9.0941 | 12 |
59
+ | 9.0416 | 9.0740 | 13 |
60
+ | 9.0129 | 9.0552 | 14 |
61
+ | 8.9912 | 9.0376 | 15 |
62
+ | 8.9680 | 9.0204 | 16 |
63
+ | 8.9454 | 9.0037 | 17 |
64
+ | 8.9195 | 8.9877 | 18 |
65
+ | 8.8975 | 8.9721 | 19 |
66
+ | 8.8795 | 8.9572 | 20 |
67
+ | 8.8502 | 8.9428 | 21 |
68
+ | 8.8225 | 8.9281 | 22 |
69
+ | 8.8015 | 8.9138 | 23 |
70
+ | 8.7767 | 8.9003 | 24 |
71
+ | 8.7509 | 8.8865 | 25 |
72
+ | 8.7220 | 8.8734 | 26 |
73
+ | 8.6941 | 8.8605 | 27 |
74
+ | 8.6681 | 8.8465 | 28 |
75
+ | 8.6301 | 8.8336 | 29 |
76
+ | 8.5992 | 8.8200 | 30 |
77
+ | 8.5714 | 8.8052 | 31 |
78
+ | 8.5383 | 8.7926 | 32 |
79
+ | 8.5024 | 8.7789 | 33 |
80
+ | 8.4610 | 8.7636 | 34 |
81
+ | 8.4281 | 8.7503 | 35 |
82
+ | 8.3899 | 8.7361 | 36 |
83
+ | 8.3533 | 8.7230 | 37 |
84
+ | 8.3132 | 8.7070 | 38 |
85
+ | 8.2752 | 8.6910 | 39 |
86
+ | 8.2345 | 8.6810 | 40 |
87
+ | 8.1960 | 8.6648 | 41 |
88
+ | 8.1543 | 8.6492 | 42 |
89
+ | 8.1172 | 8.6380 | 43 |
90
+ | 8.0813 | 8.6207 | 44 |
91
+ | 8.0300 | 8.6091 | 45 |
92
+ | 7.9933 | 8.5904 | 46 |
93
+ | 7.9482 | 8.5793 | 47 |
94
+ | 7.9128 | 8.5605 | 48 |
95
+ | 7.8651 | 8.5490 | 49 |
96
+ | 7.8304 | 8.5362 | 50 |
97
+ | 7.7855 | 8.5210 | 51 |
98
+ | 7.7519 | 8.5072 | 52 |
99
+ | 7.7060 | 8.4953 | 53 |
100
+ | 7.6608 | 8.4803 | 54 |
101
+ | 7.6056 | 8.4718 | 55 |
102
+ | 7.5630 | 8.4561 | 56 |
103
+ | 7.5407 | 8.4417 | 57 |
104
+ | 7.4962 | 8.4266 | 58 |
105
+ | 7.4505 | 8.4215 | 59 |
106
+ | 7.4109 | 8.3973 | 60 |
107
+ | 7.3746 | 8.3906 | 61 |
108
+ | 7.3244 | 8.3758 | 62 |
109
+ | 7.2809 | 8.3652 | 63 |
110
+ | 7.2430 | 8.3495 | 64 |
111
+ | 7.1911 | 8.3423 | 65 |
112
+ | 7.1611 | 8.3227 | 66 |
113
+ | 7.1075 | 8.3119 | 67 |
114
+ | 7.0734 | 8.3032 | 68 |
115
+ | 7.0258 | 8.2899 | 69 |
116
+ | 6.9824 | 8.2817 | 70 |
117
+ | 6.9412 | 8.2611 | 71 |
118
+ | 6.8944 | 8.2550 | 72 |
119
+ | 6.8464 | 8.2429 | 73 |
120
+ | 6.8119 | 8.2240 | 74 |
121
+ | 6.7580 | 8.2199 | 75 |
122
+ | 6.7163 | 8.2044 | 76 |
123
+ | 6.6795 | 8.1819 | 77 |
124
+ | 6.6326 | 8.1847 | 78 |
125
+ | 6.5853 | 8.1733 | 79 |
126
+ | 6.5533 | 8.1524 | 80 |
127
+ | 6.4894 | 8.1398 | 81 |
128
+ | 6.4450 | 8.1347 | 82 |
129
+ | 6.3933 | 8.1220 | 83 |
130
+ | 6.3410 | 8.1031 | 84 |
131
+ | 6.3249 | 8.0906 | 85 |
132
+ | 6.2508 | 8.0915 | 86 |
133
+ | 6.2044 | 8.0682 | 87 |
134
+ | 6.1633 | 8.0565 | 88 |
135
+ | 6.1228 | 8.0491 | 89 |
136
+ | 6.0807 | 8.0392 | 90 |
137
+ | 6.0308 | 8.0189 | 91 |
138
+ | 5.9657 | 8.0094 | 92 |
139
+ | 5.9309 | 7.9979 | 93 |
140
+ | 5.8735 | 7.9804 | 94 |
141
+ | 5.8191 | 7.9702 | 95 |
142
+ | 5.7671 | 7.9677 | 96 |
143
+ | 5.7181 | 7.9494 | 97 |
144
+ | 5.6724 | 7.9402 | 98 |
145
+ | 5.6309 | 7.9209 | 99 |
146
+ | 5.5713 | 7.9112 | 100 |
147
+ | 5.5281 | 7.8977 | 101 |
148
+ | 5.4531 | 7.8884 | 102 |
149
+ | 5.4251 | 7.8717 | 103 |
150
+ | 5.3797 | 7.8637 | 104 |
151
+ | 5.3067 | 7.8538 | 105 |
152
+ | 5.2699 | 7.8436 | 106 |
153
+ | 5.2156 | 7.8301 | 107 |
154
+ | 5.1551 | 7.8185 | 108 |
155
+ | 5.1223 | 7.8017 | 109 |
156
+ | 5.0656 | 7.7927 | 110 |
157
+ | 4.9996 | 7.7754 | 111 |
158
+ | 4.9432 | 7.7580 | 112 |
159
+ | 4.9028 | 7.7489 | 113 |
160
+ | 4.8242 | 7.7411 | 114 |
161
+ | 4.7516 | 7.7196 | 115 |
162
+ | 4.7323 | 7.7101 | 116 |
163
+ | 4.6725 | 7.7042 | 117 |
164
+ | 4.6302 | 7.6833 | 118 |
165
+ | 4.5391 | 7.6679 | 119 |
166
+ | 4.5007 | 7.6575 | 120 |
167
+ | 4.4435 | 7.6530 | 121 |
168
+ | 4.3905 | 7.6396 | 122 |
169
+ | 4.3257 | 7.6236 | 123 |
170
+ | 4.2915 | 7.6106 | 124 |
171
+ | 4.1985 | 7.5916 | 125 |
172
+ | 4.1590 | 7.5937 | 126 |
173
+ | 4.1070 | 7.5777 | 127 |
174
+ | 4.0532 | 7.5640 | 128 |
175
+ | 3.9899 | 7.5493 | 129 |
176
+ | 3.9289 | 7.5384 | 130 |
177
+ | 3.8696 | 7.5265 | 131 |
178
+ | 3.7945 | 7.5198 | 132 |
179
+ | 3.7454 | 7.5054 | 133 |
180
+ | 3.6815 | 7.4894 | 134 |
181
+ | 3.6453 | 7.4796 | 135 |
182
+ | 3.5649 | 7.4746 | 136 |
183
+ | 3.5214 | 7.4608 | 137 |
184
+ | 3.4517 | 7.4473 | 138 |
185
+ | 3.3937 | 7.4363 | 139 |
186
+ | 3.3266 | 7.4263 | 140 |
187
+ | 3.2744 | 7.4128 | 141 |
188
+ | 3.2199 | 7.3996 | 142 |
189
+ | 3.1601 | 7.3887 | 143 |
190
+ | 3.0998 | 7.3737 | 144 |
191
+ | 3.0584 | 7.3648 | 145 |
192
+ | 2.9785 | 7.3565 | 146 |
193
+ | 2.9186 | 7.3513 | 147 |
194
+ | 2.8455 | 7.3410 | 148 |
195
+ | 2.7996 | 7.3305 | 149 |
196
+
197
+
198
+ ### Framework versions
199
+
200
+ - Transformers 4.38.2
201
+ - TensorFlow 2.15.0
202
+ - Datasets 2.18.0
203
+ - Tokenizers 0.15.2
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 3,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 4,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 100,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "transformers_version": "4.38.2",
36
+ "use_cache": true,
37
+ "vocab_size": 10000
38
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 3,
4
+ "eos_token_id": 4,
5
+ "transformers_version": "4.38.2"
6
+ }
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1129de0f8dba453c3675f09259ca5766b0793f45d101bb57d1378a62fcd43f05
3
+ size 374265936