RAYTRAC3R commited on
Commit
757c12d
1 Parent(s): 16d3bf4

new model version

Browse files
Files changed (11) hide show
  1. checkpoint +0 -3
  2. config.json +14 -2
  3. counter +0 -1
  4. encoder.json +0 -0
  5. merges.txt +1 -1
  6. pytorch_model.bin +2 -2
  7. tokenizer.json +0 -0
  8. tokenizer_config.json +10 -0
  9. trainer_state.json +22 -0
  10. vocab.bpe +0 -0
  11. vocab.json +0 -0
checkpoint DELETED
@@ -1,3 +0,0 @@
1
- model_checkpoint_path: "model-1809"
2
- all_model_checkpoint_paths: "model-1000"
3
- all_model_checkpoint_paths: "model-1809"
 
 
 
 
config.json CHANGED
@@ -1,5 +1,9 @@
1
  {
 
2
  "activation_function": "gelu_new",
 
 
 
3
  "attn_pdrop": 0.1,
4
  "bos_token_id": 50256,
5
  "embd_pdrop": 0.1,
@@ -13,7 +17,8 @@
13
  "n_inner": null,
14
  "n_layer": 24,
15
  "n_positions": 1024,
16
- "n_vocab": 50257,
 
17
  "reorder_and_upcast_attn": false,
18
  "resid_pdrop": 0.1,
19
  "scale_attn_by_inverse_layer_idx": false,
@@ -23,7 +28,14 @@
23
  "summary_proj_to_labels": true,
24
  "summary_type": "cls_index",
25
  "summary_use_proj": true,
26
- "transformers_version": "4.22.1",
 
 
 
 
 
 
 
27
  "use_cache": true,
28
  "vocab_size": 50257
29
  }
 
1
  {
2
+ "_name_or_path": "gpt2-medium",
3
  "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
  "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
9
  "embd_pdrop": 0.1,
 
17
  "n_inner": null,
18
  "n_layer": 24,
19
  "n_positions": 1024,
20
+ "n_special": 0,
21
+ "predict_special_tokens": true,
22
  "reorder_and_upcast_attn": false,
23
  "resid_pdrop": 0.1,
24
  "scale_attn_by_inverse_layer_idx": false,
 
28
  "summary_proj_to_labels": true,
29
  "summary_type": "cls_index",
30
  "summary_use_proj": true,
31
+ "task_specific_params": {
32
+ "text-generation": {
33
+ "do_sample": true,
34
+ "max_length": 50
35
+ }
36
+ },
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.23.0.dev0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
counter DELETED
@@ -1 +0,0 @@
1
- 1809
 
 
encoder.json DELETED
The diff for this file is too large to render. See raw diff
 
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82992202046d96c34a3668abc556f88103bb4d5ccffc6536ca617fc2210c8c9a
3
- size 1444569625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b771e607deb7759c82cdf3518edc6159296409048d879a8577c28311631bcd9
3
+ size 1444566873
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "name_or_path": "gpt2-medium",
7
+ "special_tokens_map_file": null,
8
+ "tokenizer_class": "GPT2Tokenizer",
9
+ "unk_token": "<|endoftext|>"
10
+ }
trainer_state.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.05319714863283328,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 4.9473348228534955e-05,
13
+ "loss": 2.8884,
14
+ "step": 500
15
+ }
16
+ ],
17
+ "max_steps": 46995,
18
+ "num_train_epochs": 5,
19
+ "total_flos": 928700694528000.0,
20
+ "trial_name": null,
21
+ "trial_params": null
22
+ }
vocab.bpe DELETED
The diff for this file is too large to render. See raw diff
 
vocab.json CHANGED
The diff for this file is too large to render. See raw diff