denizzhansahin
commited on
Commit
•
4cec6ec
1
Parent(s):
077c855
Upload model
Browse files- README.md +203 -0
- config.json +38 -0
- generation_config.json +6 -0
- tf_model.h5 +3 -0
README.md
ADDED
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
base_model: gpt2
|
4 |
+
tags:
|
5 |
+
- generated_from_keras_callback
|
6 |
+
model-index:
|
7 |
+
- name: deneme_linux
|
8 |
+
results: []
|
9 |
+
---
|
10 |
+
|
11 |
+
<!-- This model card has been generated automatically according to the information Keras had access to. You should
|
12 |
+
probably proofread and complete it, then remove this comment. -->
|
13 |
+
|
14 |
+
# deneme_linux
|
15 |
+
|
16 |
+
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
|
17 |
+
It achieves the following results on the evaluation set:
|
18 |
+
- Train Loss: 2.7996
|
19 |
+
- Validation Loss: 7.3305
|
20 |
+
- Epoch: 149
|
21 |
+
|
22 |
+
## Model description
|
23 |
+
|
24 |
+
More information needed
|
25 |
+
|
26 |
+
## Intended uses & limitations
|
27 |
+
|
28 |
+
More information needed
|
29 |
+
|
30 |
+
## Training and evaluation data
|
31 |
+
|
32 |
+
More information needed
|
33 |
+
|
34 |
+
## Training procedure
|
35 |
+
|
36 |
+
### Training hyperparameters
|
37 |
+
|
38 |
+
The following hyperparameters were used during training:
|
39 |
+
- optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'module': 'transformers.optimization_tf', 'class_name': 'WarmUp', 'config': {'initial_learning_rate': 5e-05, 'decay_schedule_fn': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 5e-05, 'decay_steps': -995, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'warmup_steps': 1000, 'power': 1.0, 'name': None}, 'registered_name': 'WarmUp'}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.01}
|
40 |
+
- training_precision: float32
|
41 |
+
|
42 |
+
### Training results
|
43 |
+
|
44 |
+
| Train Loss | Validation Loss | Epoch |
|
45 |
+
|:----------:|:---------------:|:-----:|
|
46 |
+
| 9.3341 | 9.3329 | 0 |
|
47 |
+
| 9.3273 | 9.3261 | 1 |
|
48 |
+
| 9.3176 | 9.3147 | 2 |
|
49 |
+
| 9.3034 | 9.2988 | 3 |
|
50 |
+
| 9.2891 | 9.2790 | 4 |
|
51 |
+
| 9.2696 | 9.2563 | 5 |
|
52 |
+
| 9.2427 | 9.2321 | 6 |
|
53 |
+
| 9.2079 | 9.2076 | 7 |
|
54 |
+
| 9.1797 | 9.1831 | 8 |
|
55 |
+
| 9.1538 | 9.1594 | 9 |
|
56 |
+
| 9.1238 | 9.1368 | 10 |
|
57 |
+
| 9.0976 | 9.1149 | 11 |
|
58 |
+
| 9.0693 | 9.0941 | 12 |
|
59 |
+
| 9.0416 | 9.0740 | 13 |
|
60 |
+
| 9.0129 | 9.0552 | 14 |
|
61 |
+
| 8.9912 | 9.0376 | 15 |
|
62 |
+
| 8.9680 | 9.0204 | 16 |
|
63 |
+
| 8.9454 | 9.0037 | 17 |
|
64 |
+
| 8.9195 | 8.9877 | 18 |
|
65 |
+
| 8.8975 | 8.9721 | 19 |
|
66 |
+
| 8.8795 | 8.9572 | 20 |
|
67 |
+
| 8.8502 | 8.9428 | 21 |
|
68 |
+
| 8.8225 | 8.9281 | 22 |
|
69 |
+
| 8.8015 | 8.9138 | 23 |
|
70 |
+
| 8.7767 | 8.9003 | 24 |
|
71 |
+
| 8.7509 | 8.8865 | 25 |
|
72 |
+
| 8.7220 | 8.8734 | 26 |
|
73 |
+
| 8.6941 | 8.8605 | 27 |
|
74 |
+
| 8.6681 | 8.8465 | 28 |
|
75 |
+
| 8.6301 | 8.8336 | 29 |
|
76 |
+
| 8.5992 | 8.8200 | 30 |
|
77 |
+
| 8.5714 | 8.8052 | 31 |
|
78 |
+
| 8.5383 | 8.7926 | 32 |
|
79 |
+
| 8.5024 | 8.7789 | 33 |
|
80 |
+
| 8.4610 | 8.7636 | 34 |
|
81 |
+
| 8.4281 | 8.7503 | 35 |
|
82 |
+
| 8.3899 | 8.7361 | 36 |
|
83 |
+
| 8.3533 | 8.7230 | 37 |
|
84 |
+
| 8.3132 | 8.7070 | 38 |
|
85 |
+
| 8.2752 | 8.6910 | 39 |
|
86 |
+
| 8.2345 | 8.6810 | 40 |
|
87 |
+
| 8.1960 | 8.6648 | 41 |
|
88 |
+
| 8.1543 | 8.6492 | 42 |
|
89 |
+
| 8.1172 | 8.6380 | 43 |
|
90 |
+
| 8.0813 | 8.6207 | 44 |
|
91 |
+
| 8.0300 | 8.6091 | 45 |
|
92 |
+
| 7.9933 | 8.5904 | 46 |
|
93 |
+
| 7.9482 | 8.5793 | 47 |
|
94 |
+
| 7.9128 | 8.5605 | 48 |
|
95 |
+
| 7.8651 | 8.5490 | 49 |
|
96 |
+
| 7.8304 | 8.5362 | 50 |
|
97 |
+
| 7.7855 | 8.5210 | 51 |
|
98 |
+
| 7.7519 | 8.5072 | 52 |
|
99 |
+
| 7.7060 | 8.4953 | 53 |
|
100 |
+
| 7.6608 | 8.4803 | 54 |
|
101 |
+
| 7.6056 | 8.4718 | 55 |
|
102 |
+
| 7.5630 | 8.4561 | 56 |
|
103 |
+
| 7.5407 | 8.4417 | 57 |
|
104 |
+
| 7.4962 | 8.4266 | 58 |
|
105 |
+
| 7.4505 | 8.4215 | 59 |
|
106 |
+
| 7.4109 | 8.3973 | 60 |
|
107 |
+
| 7.3746 | 8.3906 | 61 |
|
108 |
+
| 7.3244 | 8.3758 | 62 |
|
109 |
+
| 7.2809 | 8.3652 | 63 |
|
110 |
+
| 7.2430 | 8.3495 | 64 |
|
111 |
+
| 7.1911 | 8.3423 | 65 |
|
112 |
+
| 7.1611 | 8.3227 | 66 |
|
113 |
+
| 7.1075 | 8.3119 | 67 |
|
114 |
+
| 7.0734 | 8.3032 | 68 |
|
115 |
+
| 7.0258 | 8.2899 | 69 |
|
116 |
+
| 6.9824 | 8.2817 | 70 |
|
117 |
+
| 6.9412 | 8.2611 | 71 |
|
118 |
+
| 6.8944 | 8.2550 | 72 |
|
119 |
+
| 6.8464 | 8.2429 | 73 |
|
120 |
+
| 6.8119 | 8.2240 | 74 |
|
121 |
+
| 6.7580 | 8.2199 | 75 |
|
122 |
+
| 6.7163 | 8.2044 | 76 |
|
123 |
+
| 6.6795 | 8.1819 | 77 |
|
124 |
+
| 6.6326 | 8.1847 | 78 |
|
125 |
+
| 6.5853 | 8.1733 | 79 |
|
126 |
+
| 6.5533 | 8.1524 | 80 |
|
127 |
+
| 6.4894 | 8.1398 | 81 |
|
128 |
+
| 6.4450 | 8.1347 | 82 |
|
129 |
+
| 6.3933 | 8.1220 | 83 |
|
130 |
+
| 6.3410 | 8.1031 | 84 |
|
131 |
+
| 6.3249 | 8.0906 | 85 |
|
132 |
+
| 6.2508 | 8.0915 | 86 |
|
133 |
+
| 6.2044 | 8.0682 | 87 |
|
134 |
+
| 6.1633 | 8.0565 | 88 |
|
135 |
+
| 6.1228 | 8.0491 | 89 |
|
136 |
+
| 6.0807 | 8.0392 | 90 |
|
137 |
+
| 6.0308 | 8.0189 | 91 |
|
138 |
+
| 5.9657 | 8.0094 | 92 |
|
139 |
+
| 5.9309 | 7.9979 | 93 |
|
140 |
+
| 5.8735 | 7.9804 | 94 |
|
141 |
+
| 5.8191 | 7.9702 | 95 |
|
142 |
+
| 5.7671 | 7.9677 | 96 |
|
143 |
+
| 5.7181 | 7.9494 | 97 |
|
144 |
+
| 5.6724 | 7.9402 | 98 |
|
145 |
+
| 5.6309 | 7.9209 | 99 |
|
146 |
+
| 5.5713 | 7.9112 | 100 |
|
147 |
+
| 5.5281 | 7.8977 | 101 |
|
148 |
+
| 5.4531 | 7.8884 | 102 |
|
149 |
+
| 5.4251 | 7.8717 | 103 |
|
150 |
+
| 5.3797 | 7.8637 | 104 |
|
151 |
+
| 5.3067 | 7.8538 | 105 |
|
152 |
+
| 5.2699 | 7.8436 | 106 |
|
153 |
+
| 5.2156 | 7.8301 | 107 |
|
154 |
+
| 5.1551 | 7.8185 | 108 |
|
155 |
+
| 5.1223 | 7.8017 | 109 |
|
156 |
+
| 5.0656 | 7.7927 | 110 |
|
157 |
+
| 4.9996 | 7.7754 | 111 |
|
158 |
+
| 4.9432 | 7.7580 | 112 |
|
159 |
+
| 4.9028 | 7.7489 | 113 |
|
160 |
+
| 4.8242 | 7.7411 | 114 |
|
161 |
+
| 4.7516 | 7.7196 | 115 |
|
162 |
+
| 4.7323 | 7.7101 | 116 |
|
163 |
+
| 4.6725 | 7.7042 | 117 |
|
164 |
+
| 4.6302 | 7.6833 | 118 |
|
165 |
+
| 4.5391 | 7.6679 | 119 |
|
166 |
+
| 4.5007 | 7.6575 | 120 |
|
167 |
+
| 4.4435 | 7.6530 | 121 |
|
168 |
+
| 4.3905 | 7.6396 | 122 |
|
169 |
+
| 4.3257 | 7.6236 | 123 |
|
170 |
+
| 4.2915 | 7.6106 | 124 |
|
171 |
+
| 4.1985 | 7.5916 | 125 |
|
172 |
+
| 4.1590 | 7.5937 | 126 |
|
173 |
+
| 4.1070 | 7.5777 | 127 |
|
174 |
+
| 4.0532 | 7.5640 | 128 |
|
175 |
+
| 3.9899 | 7.5493 | 129 |
|
176 |
+
| 3.9289 | 7.5384 | 130 |
|
177 |
+
| 3.8696 | 7.5265 | 131 |
|
178 |
+
| 3.7945 | 7.5198 | 132 |
|
179 |
+
| 3.7454 | 7.5054 | 133 |
|
180 |
+
| 3.6815 | 7.4894 | 134 |
|
181 |
+
| 3.6453 | 7.4796 | 135 |
|
182 |
+
| 3.5649 | 7.4746 | 136 |
|
183 |
+
| 3.5214 | 7.4608 | 137 |
|
184 |
+
| 3.4517 | 7.4473 | 138 |
|
185 |
+
| 3.3937 | 7.4363 | 139 |
|
186 |
+
| 3.3266 | 7.4263 | 140 |
|
187 |
+
| 3.2744 | 7.4128 | 141 |
|
188 |
+
| 3.2199 | 7.3996 | 142 |
|
189 |
+
| 3.1601 | 7.3887 | 143 |
|
190 |
+
| 3.0998 | 7.3737 | 144 |
|
191 |
+
| 3.0584 | 7.3648 | 145 |
|
192 |
+
| 2.9785 | 7.3565 | 146 |
|
193 |
+
| 2.9186 | 7.3513 | 147 |
|
194 |
+
| 2.8455 | 7.3410 | 148 |
|
195 |
+
| 2.7996 | 7.3305 | 149 |
|
196 |
+
|
197 |
+
|
198 |
+
### Framework versions
|
199 |
+
|
200 |
+
- Transformers 4.38.2
|
201 |
+
- TensorFlow 2.15.0
|
202 |
+
- Datasets 2.18.0
|
203 |
+
- Tokenizers 0.15.2
|
config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "gpt2",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 3,
|
9 |
+
"embd_pdrop": 0.1,
|
10 |
+
"eos_token_id": 4,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 100,
|
15 |
+
"n_embd": 768,
|
16 |
+
"n_head": 12,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 12,
|
19 |
+
"n_positions": 1024,
|
20 |
+
"reorder_and_upcast_attn": false,
|
21 |
+
"resid_pdrop": 0.1,
|
22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
23 |
+
"scale_attn_weights": true,
|
24 |
+
"summary_activation": null,
|
25 |
+
"summary_first_dropout": 0.1,
|
26 |
+
"summary_proj_to_labels": true,
|
27 |
+
"summary_type": "cls_index",
|
28 |
+
"summary_use_proj": true,
|
29 |
+
"task_specific_params": {
|
30 |
+
"text-generation": {
|
31 |
+
"do_sample": true,
|
32 |
+
"max_length": 50
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"transformers_version": "4.38.2",
|
36 |
+
"use_cache": true,
|
37 |
+
"vocab_size": 10000
|
38 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 3,
|
4 |
+
"eos_token_id": 4,
|
5 |
+
"transformers_version": "4.38.2"
|
6 |
+
}
|
tf_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1129de0f8dba453c3675f09259ca5766b0793f45d101bb57d1378a62fcd43f05
|
3 |
+
size 374265936
|