bigmorning commited on
Commit
1d58b15
1 Parent(s): 4040269
Files changed (3) hide show
  1. README.md +8 -27
  2. config.json +13 -8
  3. tf_model.h5 +2 -2
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- license: mit
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
@@ -12,11 +12,10 @@ probably proofread and complete it, then remove this comment. -->
12
 
13
  # try-m
14
 
15
- This model is a fine-tuned version of [dbmdz/german-gpt2](https://huggingface.co/dbmdz/german-gpt2) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Train Loss: 0.0004
18
- - Validation Loss: 0.0062
19
- - Epoch: 19
20
 
21
  ## Model description
22
 
@@ -40,28 +39,10 @@ The following hyperparameters were used during training:
40
 
41
  ### Training results
42
 
43
- | Train Loss | Validation Loss | Epoch |
44
- |:----------:|:---------------:|:-----:|
45
- | 0.1085 | 0.0849 | 0 |
46
- | 0.0418 | 0.0418 | 1 |
47
- | 0.0180 | 0.0220 | 2 |
48
- | 0.0110 | 0.0187 | 3 |
49
- | 0.0098 | 0.0130 | 4 |
50
- | 0.0056 | 0.0111 | 5 |
51
- | 0.0039 | 0.0102 | 6 |
52
- | 0.0034 | 0.0101 | 7 |
53
- | 0.0027 | 0.0092 | 8 |
54
- | 0.0024 | 0.0088 | 9 |
55
- | 0.0021 | 0.0085 | 10 |
56
- | 0.0019 | 0.0081 | 11 |
57
- | 0.0017 | 0.0081 | 12 |
58
- | 0.0014 | 0.0079 | 13 |
59
- | 0.0012 | 0.0076 | 14 |
60
- | 0.0010 | 0.0069 | 15 |
61
- | 0.0008 | 0.0073 | 16 |
62
- | 0.0008 | 0.0068 | 17 |
63
- | 0.0004 | 0.0060 | 18 |
64
- | 0.0004 | 0.0062 | 19 |
65
 
66
 
67
  ### Framework versions
 
1
  ---
2
+ license: apache-2.0
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
 
12
 
13
  # try-m
14
 
15
+ This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Train Loss: 0.2158
18
+ - Epoch: 1
 
19
 
20
  ## Model description
21
 
 
39
 
40
  ### Training results
41
 
42
+ | Train Loss | Epoch |
43
+ |:----------:|:-----:|
44
+ | 0.5434 | 0 |
45
+ | 0.2158 | 1 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  ### Framework versions
config.json CHANGED
@@ -1,25 +1,31 @@
1
  {
2
- "_name_or_path": "dbmdz/german-gpt2",
 
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
6
  ],
7
- "attn_pdrop": 0.0,
8
  "bos_token_id": 50256,
9
- "embd_pdrop": 0.0,
10
- "eos_token_id": 3,
11
- "gradient_checkpointing": false,
 
 
12
  "initializer_range": 0.02,
 
 
 
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
  "n_ctx": 1024,
16
  "n_embd": 768,
17
  "n_head": 12,
18
  "n_inner": null,
19
- "n_layer": 12,
20
  "n_positions": 1024,
21
  "reorder_and_upcast_attn": false,
22
- "resid_pdrop": 0.0,
23
  "scale_attn_by_inverse_layer_idx": false,
24
  "scale_attn_weights": true,
25
  "summary_activation": null,
@@ -33,7 +39,6 @@
33
  "max_length": 50
34
  }
35
  },
36
- "torch_dtype": "float32",
37
  "transformers_version": "4.17.0",
38
  "use_cache": false,
39
  "vocab_size": 5998
 
1
  {
2
+ "_name_or_path": "distilgpt2",
3
+ "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
6
  "GPT2LMHeadModel"
7
  ],
8
+ "attn_pdrop": 0.1,
9
  "bos_token_id": 50256,
10
+ "embd_pdrop": 0.1,
11
+ "eos_token_id": 50256,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
  "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
  "layer_norm_epsilon": 1e-05,
20
  "model_type": "gpt2",
21
  "n_ctx": 1024,
22
  "n_embd": 768,
23
  "n_head": 12,
24
  "n_inner": null,
25
+ "n_layer": 6,
26
  "n_positions": 1024,
27
  "reorder_and_upcast_attn": false,
28
+ "resid_pdrop": 0.1,
29
  "scale_attn_by_inverse_layer_idx": false,
30
  "scale_attn_weights": true,
31
  "summary_activation": null,
 
39
  "max_length": 50
40
  }
41
  },
 
42
  "transformers_version": "4.17.0",
43
  "use_cache": false,
44
  "vocab_size": 5998
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d502ff58d91a0258249ab35f287ad323eba3943d1e255cf37d6ab80452029b92
3
- size 380401328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:884d2b7ede8ea69141e3f9486f0cc1d2ab1ba9eff4650d9a2a585a267c338d31
3
+ size 210211336