perplex
Browse files- README.md +3 -3
- config.json +2 -2
- merges.txt +0 -0
- model.safetensors +2 -2
- runs/Mar21_16-17-46_09a0a2fe07b8/events.out.tfevents.1711037874.09a0a2fe07b8.895.0 +3 -0
- runs/Mar21_16-19-50_09a0a2fe07b8/events.out.tfevents.1711037995.09a0a2fe07b8.895.1 +3 -0
- runs/Mar21_16-21-31_09a0a2fe07b8/events.out.tfevents.1711038096.09a0a2fe07b8.895.2 +3 -0
- runs/Mar21_16-22-04_09a0a2fe07b8/events.out.tfevents.1711038134.09a0a2fe07b8.895.3 +3 -0
- runs/Mar21_16-22-54_09a0a2fe07b8/events.out.tfevents.1711038180.09a0a2fe07b8.895.4 +3 -0
- runs/Mar21_16-27-29_09a0a2fe07b8/events.out.tfevents.1711038456.09a0a2fe07b8.895.5 +3 -0
- tokenizer.json +0 -0
- training_args.bin +1 -1
- vocab.json +0 -0
README.md
CHANGED
@@ -31,15 +31,15 @@ More information needed
|
|
31 |
|
32 |
The following hyperparameters were used during training:
|
33 |
- learning_rate: 0.0005
|
34 |
-
- train_batch_size:
|
35 |
- eval_batch_size: 8
|
36 |
- seed: 42
|
37 |
- gradient_accumulation_steps: 8
|
38 |
-
- total_train_batch_size:
|
39 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
40 |
- lr_scheduler_type: cosine
|
41 |
- lr_scheduler_warmup_steps: 1000
|
42 |
-
- num_epochs:
|
43 |
- mixed_precision_training: Native AMP
|
44 |
|
45 |
### Training results
|
|
|
31 |
|
32 |
The following hyperparameters were used during training:
|
33 |
- learning_rate: 0.0005
|
34 |
+
- train_batch_size: 4
|
35 |
- eval_batch_size: 8
|
36 |
- seed: 42
|
37 |
- gradient_accumulation_steps: 8
|
38 |
+
- total_train_batch_size: 32
|
39 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
40 |
- lr_scheduler_type: cosine
|
41 |
- lr_scheduler_warmup_steps: 1000
|
42 |
+
- num_epochs: 5
|
43 |
- mixed_precision_training: Native AMP
|
44 |
|
45 |
### Training results
|
config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"initializer_range": 0.02,
|
11 |
"layer_norm_epsilon": 1e-05,
|
12 |
"model_type": "gpt2",
|
13 |
-
"n_ctx":
|
14 |
"n_embd": 768,
|
15 |
"n_head": 12,
|
16 |
"n_inner": null,
|
@@ -28,5 +28,5 @@
|
|
28 |
"torch_dtype": "float32",
|
29 |
"transformers_version": "4.38.2",
|
30 |
"use_cache": true,
|
31 |
-
"vocab_size":
|
32 |
}
|
|
|
10 |
"initializer_range": 0.02,
|
11 |
"layer_norm_epsilon": 1e-05,
|
12 |
"model_type": "gpt2",
|
13 |
+
"n_ctx": 4,
|
14 |
"n_embd": 768,
|
15 |
"n_head": 12,
|
16 |
"n_inner": null,
|
|
|
28 |
"torch_dtype": "float32",
|
29 |
"transformers_version": "4.38.2",
|
30 |
"use_cache": true,
|
31 |
+
"vocab_size": 10000
|
32 |
}
|
merges.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90cb4672d4203626dec85e5a6c5f4c2f62a7cc198d9aa12b4223928656bc1c1a
|
3 |
+
size 374104704
|
runs/Mar21_16-17-46_09a0a2fe07b8/events.out.tfevents.1711037874.09a0a2fe07b8.895.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:491812a3283e1f9dbdfd9176f8ce06d971132849e1bb5ba99fc4d152d6d2b1d9
|
3 |
+
size 9066
|
runs/Mar21_16-19-50_09a0a2fe07b8/events.out.tfevents.1711037995.09a0a2fe07b8.895.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d6e3d076af47b58e860263de6fbf54b634752ce5928d2ab8cb33cd6ca357c3c
|
3 |
+
size 9066
|
runs/Mar21_16-21-31_09a0a2fe07b8/events.out.tfevents.1711038096.09a0a2fe07b8.895.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8a10530a47635a403dd6ad8ae11aed3f1cc230fc07db0f63dbe4b36af3f9715
|
3 |
+
size 4576
|
runs/Mar21_16-22-04_09a0a2fe07b8/events.out.tfevents.1711038134.09a0a2fe07b8.895.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10ad60e0644329a2c5fe265aa3e669c9ce4deccc21bb21a6cd4ea6b77767681a
|
3 |
+
size 4576
|
runs/Mar21_16-22-54_09a0a2fe07b8/events.out.tfevents.1711038180.09a0a2fe07b8.895.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c63c59fb77bc490ae37eb297634871960c10895dffeec99aabacd6b97025fb1
|
3 |
+
size 9064
|
runs/Mar21_16-27-29_09a0a2fe07b8/events.out.tfevents.1711038456.09a0a2fe07b8.895.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cf2c9a5dd982ad18afa62a876fd343554b97831237c2895ca68cb31c2a8f681
|
3 |
+
size 4928
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:923bc57551b1102c6e0be1b3877f4b1f5bcaee49a52a667822c08d7f43752ffa
|
3 |
size 4856
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|