pritoms commited on
Commit
5539fcd
1 Parent(s): 9e29ec1
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- license: apache-2.0
3
  tags:
4
  - generated_from_trainer
5
  datasets:
@@ -17,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # gpt-neo-125M-Byethon
19
 
20
- This model is a fine-tuned version of [EleutherAI/gpt-neo-125M](https://huggingface.co/EleutherAI/gpt-neo-125M) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.6258
23
 
24
  ## Model description
25
 
@@ -50,9 +50,9 @@ The following hyperparameters were used during training:
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:----:|:---------------:|
53
- | No log | 1.0 | 31 | 0.8309 |
54
- | No log | 2.0 | 62 | 0.6676 |
55
- | No log | 3.0 | 93 | 0.6258 |
56
 
57
 
58
  ### Framework versions
 
1
  ---
2
+ license: mit
3
  tags:
4
  - generated_from_trainer
5
  datasets:
 
17
 
18
  # gpt-neo-125M-Byethon
19
 
20
+ This model is a fine-tuned version of [Sentdex/GPyT](https://huggingface.co/Sentdex/GPyT) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.6310
23
 
24
  ## Model description
25
 
 
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:----:|:---------------:|
53
+ | No log | 1.0 | 31 | 2.6150 |
54
+ | No log | 2.0 | 62 | 1.8266 |
55
+ | No log | 3.0 | 93 | 1.6310 |
56
 
57
 
58
  ### Framework versions
config.json CHANGED
@@ -1,46 +1,25 @@
1
  {
2
- "_name_or_path": "EleutherAI/gpt-neo-125M",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
- "GPTNeoForCausalLM"
6
  ],
7
- "attention_dropout": 0,
8
- "attention_layers": [
9
- "global",
10
- "local",
11
- "global",
12
- "local",
13
- "global",
14
- "local",
15
- "global",
16
- "local",
17
- "global",
18
- "local",
19
- "global",
20
- "local"
21
- ],
22
- "attention_types": [
23
- [
24
- [
25
- "global",
26
- "local"
27
- ],
28
- 6
29
- ]
30
- ],
31
- "bos_token_id": 50256,
32
- "embed_dropout": 0,
33
- "eos_token_id": 50256,
34
  "gradient_checkpointing": false,
35
- "hidden_size": 768,
36
  "initializer_range": 0.02,
37
- "intermediate_size": null,
38
  "layer_norm_epsilon": 1e-05,
39
- "max_position_embeddings": 2048,
40
- "model_type": "gpt_neo",
41
- "num_heads": 12,
42
- "num_layers": 12,
43
- "resid_dropout": 0,
 
 
 
 
44
  "summary_activation": null,
45
  "summary_first_dropout": 0.1,
46
  "summary_proj_to_labels": true,
@@ -49,6 +28,5 @@
49
  "torch_dtype": "float32",
50
  "transformers_version": "4.10.0",
51
  "use_cache": true,
52
- "vocab_size": 50257,
53
- "window_size": 256
54
  }
 
1
  {
2
+ "_name_or_path": "Sentdex/GPyT",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
+ "GPT2LMHeadModel"
6
  ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "gradient_checkpointing": false,
 
12
  "initializer_range": 0.02,
 
13
  "layer_norm_epsilon": 1e-05,
14
+ "model_type": "gpt2",
15
+ "n_ctx": 1024,
16
+ "n_embd": 768,
17
+ "n_head": 12,
18
+ "n_inner": null,
19
+ "n_layer": 12,
20
+ "n_positions": 1024,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
25
  "summary_proj_to_labels": true,
 
28
  "torch_dtype": "float32",
29
  "transformers_version": "4.10.0",
30
  "use_cache": true,
31
+ "vocab_size": 52000
 
32
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a947f7279860c629884849659ee37d434ece54cfc6a62e041aaa3d8d90ce0ba0
3
- size 526017245
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8ada5d04a75b73895b09ad0480668c25cf2b932efb03a58892523022d486cc
3
+ size 515758313
runs/Sep07_20-19-12_4519ca7e7339/1631045956.9950128/events.out.tfevents.1631045956.4519ca7e7339.87.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:834c0e3a5d71bdcddd9ff02433829d5e0a16b946d5a4024cd34309e016f5d188
3
+ size 4181
runs/Sep07_20-19-12_4519ca7e7339/events.out.tfevents.1631045956.4519ca7e7339.87.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f470f9d42b68219266fe8ff4b3749079052a30b5801d29b9e221a762d368869a
3
+ size 4174
runs/Sep07_20-19-12_4519ca7e7339/events.out.tfevents.1631046110.4519ca7e7339.87.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:470099022faa808d3c57903d82443b88441991469f9493661ddfcd96ae7e0aab
3
+ size 306
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba53c8a4d78e42c88f4b3ab1fd0dc5f6e3b1647b30dbc2fccee5e497bc284012
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c12a69a8c8ca3816ecfaed1c985a39a507271f90d0aaca8329049e442a6e7e
3
  size 2671