bigmorning
/

try-m

@@ -1,5 +1,5 @@
 ---
-license: mit
 tags:
 - generated_from_keras_callback
 model-index:
@@ -12,11 +12,10 @@ probably proofread and complete it, then remove this comment. -->
 # try-m
-This model is a fine-tuned version of [dbmdz/german-gpt2](https://huggingface.co/dbmdz/german-gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Train Loss: 0.0004
-- Validation Loss: 0.0062
-- Epoch: 19
 ## Model description
@@ -40,28 +39,10 @@ The following hyperparameters were used during training:
 ### Training results
-| Train Loss | Validation Loss | Epoch |
-|:----------:|:---------------:|:-----:|
-| 0.1085     | 0.0849          | 0     |
-| 0.0418     | 0.0418          | 1     |
-| 0.0180     | 0.0220          | 2     |
-| 0.0110     | 0.0187          | 3     |
-| 0.0098     | 0.0130          | 4     |
-| 0.0056     | 0.0111          | 5     |
-| 0.0039     | 0.0102          | 6     |
-| 0.0034     | 0.0101          | 7     |
-| 0.0027     | 0.0092          | 8     |
-| 0.0024     | 0.0088          | 9     |
-| 0.0021     | 0.0085          | 10    |
-| 0.0019     | 0.0081          | 11    |
-| 0.0017     | 0.0081          | 12    |
-| 0.0014     | 0.0079          | 13    |
-| 0.0012     | 0.0076          | 14    |
-| 0.0010     | 0.0069          | 15    |
-| 0.0008     | 0.0073          | 16    |
-| 0.0008     | 0.0068          | 17    |
-| 0.0004     | 0.0060          | 18    |
-| 0.0004     | 0.0062          | 19    |
 ### Framework versions

 ---
+license: apache-2.0
 tags:
 - generated_from_keras_callback
 model-index:
 # try-m
+This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Train Loss: 0.2158
+- Epoch: 1
 ## Model description
 ### Training results
+| Train Loss | Epoch |
+|:----------:|:-----:|
+| 0.5434     | 0     |
+| 0.2158     | 1     |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,25 +1,31 @@
 {
-  "_name_or_path": "dbmdz/german-gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   ],
-  "attn_pdrop": 0.0,
   "bos_token_id": 50256,
-  "embd_pdrop": 0.0,
-  "eos_token_id": 3,
-  "gradient_checkpointing": false,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
   "n_ctx": 1024,
   "n_embd": 768,
   "n_head": 12,
   "n_inner": null,
-  "n_layer": 12,
   "n_positions": 1024,
   "reorder_and_upcast_attn": false,
-  "resid_pdrop": 0.0,
   "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
@@ -33,7 +39,6 @@
       "max_length": 50
     }
   },
-  "torch_dtype": "float32",
   "transformers_version": "4.17.0",
   "use_cache": false,
   "vocab_size": 5998

 {
+  "_name_or_path": "distilgpt2",
+  "_num_labels": 1,
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   ],
+  "attn_pdrop": 0.1,
   "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "id2label": {
+    "0": "LABEL_0"
+  },
   "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
   "n_ctx": 1024,
   "n_embd": 768,
   "n_head": 12,
   "n_inner": null,
+  "n_layer": 6,
   "n_positions": 1024,
   "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
   "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
       "max_length": 50
     }
   },
   "transformers_version": "4.17.0",
   "use_cache": false,
   "vocab_size": 5998

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d502ff58d91a0258249ab35f287ad323eba3943d1e255cf37d6ab80452029b92
-size 380401328

 version https://git-lfs.github.com/spec/v1
+oid sha256:884d2b7ede8ea69141e3f9486f0cc1d2ab1ba9eff4650d9a2a585a267c338d31
+size 210211336