gerou161 commited on
Commit
264eb37
1 Parent(s): b61e29b

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. attention_2_only_emb/model_1000/optimizer.pt +3 -0
  2. attention_2_only_emb/model_11000/optimizer.pt +3 -0
  3. attention_2_only_emb/model_13000/optimizer.pt +3 -0
  4. attention_2_only_emb/model_14000/optimizer.pt +3 -0
  5. attention_2_only_emb/model_2000/optimizer.pt +3 -0
  6. attention_2_only_emb/model_3000/optimizer.pt +3 -0
  7. attention_2_only_emb/model_4000/optimizer.pt +3 -0
  8. attention_2_only_emb/model_5000/optimizer.pt +3 -0
  9. attention_2_only_emb/model_7000/pytorch_model.bin +3 -0
  10. attention_2_only_emb/model_9000/optimizer.pt +3 -0
  11. bigram_1/model_13000/optimizer.pt +3 -0
  12. bigram_1/model_2000/optimizer.pt +3 -0
  13. first_attention_2/model_1000/config.json +19 -0
  14. first_attention_2/model_1000/training_state.json +8 -0
  15. first_attention_2/model_10000/config.json +19 -0
  16. first_attention_2/model_10000/training_state.json +8 -0
  17. first_attention_2/model_11000/config.json +19 -0
  18. first_attention_2/model_11000/training_state.json +8 -0
  19. first_attention_2/model_12000/config.json +19 -0
  20. first_attention_2/model_12000/training_state.json +8 -0
  21. first_attention_2/model_14000/config.json +19 -0
  22. first_attention_2/model_14000/training_state.json +8 -0
  23. first_attention_2/model_15000/config.json +19 -0
  24. first_attention_2/model_15000/training_state.json +8 -0
  25. first_attention_2/model_5000/config.json +19 -0
  26. first_attention_2/model_5000/training_state.json +8 -0
  27. first_attention_2/model_6000/config.json +19 -0
  28. first_attention_2/model_6000/training_state.json +8 -0
  29. first_attention_2/model_7000/config.json +19 -0
  30. first_attention_2/model_7000/training_state.json +8 -0
  31. first_attention_2/model_8000/config.json +19 -0
  32. first_attention_2/model_8000/training_state.json +8 -0
  33. first_attention_2/model_9000/config.json +19 -0
  34. first_attention_2/model_9000/training_state.json +8 -0
  35. first_attention_2_attention_unfreeze/model_1000/optimizer.pt +3 -0
  36. first_attention_2_attention_unfreeze/model_10000/optimizer.pt +3 -0
  37. first_attention_2_attention_unfreeze/model_12000/optimizer.pt +3 -0
  38. first_attention_2_attention_unfreeze/model_13000/optimizer.pt +3 -0
  39. first_attention_2_attention_unfreeze/model_15000/optimizer.pt +3 -0
  40. first_attention_2_attention_unfreeze/model_2000/optimizer.pt +3 -0
  41. first_attention_2_attention_unfreeze/model_3000/optimizer.pt +3 -0
  42. first_attention_2_attention_unfreeze/model_4000/optimizer.pt +3 -0
  43. first_attention_2_attention_unfreeze/model_5000/optimizer.pt +3 -0
  44. first_attention_2_attention_unfreeze/model_6000/optimizer.pt +3 -0
  45. first_attention_2_attention_unfreeze/model_8000/optimizer.pt +3 -0
  46. first_attention_resume_loss_convergence/model_3000/optimizer.pt +3 -0
  47. first_layer_1/model_1000/optimizer.pt +3 -0
  48. first_layer_1/model_1000/pytorch_model.bin +3 -0
  49. first_layer_1/model_10000/optimizer.pt +3 -0
  50. first_layer_1/model_10000/pytorch_model.bin +3 -0
attention_2_only_emb/model_1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e36709a2be1f44091ca2b79e3da0d807a8e1b63e9dd484ca40aa563d6806f9
3
+ size 3831971578
attention_2_only_emb/model_11000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1034cac02becda9747198b1d50eef735ee2f09feda0072544b26e891950744
3
+ size 3831971578
attention_2_only_emb/model_13000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8bd5fcd410c58a01c810503b05f4b10d79dae9c30ce05db36d1a1ab9acf891a
3
+ size 3831971578
attention_2_only_emb/model_14000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56aee6ce75242ef897404ce0d71199504b76bd1d94d5e1944eaf60383bbd1c9a
3
+ size 3831971578
attention_2_only_emb/model_2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff35e5764903efe1bc0e3c6183bcede724b86a46808ee76b9ca1a603638b9e0
3
+ size 3831971578
attention_2_only_emb/model_3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62e15803c9dec9b94261f7dd38148f24cca9d0fb2904f5d98fba4499d99f47c
3
+ size 3831971578
attention_2_only_emb/model_4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfb492a7c81974c21467d61d402f328d07fc4fbec60714e63a347b68edc23835
3
+ size 3831971578
attention_2_only_emb/model_5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f487d5050fc7515d3b5ae387945268e79be8bcb60fe97ac101217fbf0d7ec2b
3
+ size 3831971578
attention_2_only_emb/model_7000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78fc551240970d2e19e77c2c6d786e1133a98032538efbc249a7900c5772506b
3
+ size 2533545094
attention_2_only_emb/model_9000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999c518c597d729ba6de7041a915ef77e80d142438acff911ab850422a16d60a
3
+ size 3831971578
bigram_1/model_13000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d22bb0bb9a1d249471583ae4ee828f82e867df6e0ddba520594ef2019efa7a56
3
+ size 1235135766
bigram_1/model_2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af1e99bcab5a86059566dc445b7ba5350f5a19fcd7b6edb0df72a96bd3ffbcd6
3
+ size 1235135766
first_attention_2/model_1000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_1000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 1000,
3
+ "update_step": 1000,
4
+ "tokens_seen": 639025152,
5
+ "tokens_seen_before": 638386176,
6
+ "update_time": 2.8817572593688965,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_10000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_10000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 10000,
3
+ "update_step": 10000,
4
+ "tokens_seen": 6389809152,
5
+ "tokens_seen_before": 6389170176,
6
+ "update_time": 2.883666515350342,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_11000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_11000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 11000,
3
+ "update_step": 11000,
4
+ "tokens_seen": 7028785152,
5
+ "tokens_seen_before": 7028146176,
6
+ "update_time": 2.884199380874634,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_12000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_12000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 12000,
3
+ "update_step": 12000,
4
+ "tokens_seen": 7667761152,
5
+ "tokens_seen_before": 7667122176,
6
+ "update_time": 2.8828084468841553,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_14000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_14000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 14000,
3
+ "update_step": 14000,
4
+ "tokens_seen": 8945713152,
5
+ "tokens_seen_before": 8945074176,
6
+ "update_time": 2.8815410137176514,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_15000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_15000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 15000,
3
+ "update_step": 15000,
4
+ "tokens_seen": 9584689152,
5
+ "tokens_seen_before": 9584050176,
6
+ "update_time": 2.8837459087371826,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_5000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_5000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 5000,
3
+ "update_step": 5000,
4
+ "tokens_seen": 3194929152,
5
+ "tokens_seen_before": 3194290176,
6
+ "update_time": 2.882314920425415,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_6000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_6000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 6000,
3
+ "update_step": 6000,
4
+ "tokens_seen": 3833905152,
5
+ "tokens_seen_before": 3833266176,
6
+ "update_time": 2.8819949626922607,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_7000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_7000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 7000,
3
+ "update_step": 7000,
4
+ "tokens_seen": 4472881152,
5
+ "tokens_seen_before": 4472242176,
6
+ "update_time": 2.882638931274414,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_8000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_8000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 8000,
3
+ "update_step": 8000,
4
+ "tokens_seen": 5111857152,
5
+ "tokens_seen_before": 5111218176,
6
+ "update_time": 2.883155584335327,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2/model_9000/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 50256,
6
+ "eos_token_id": 50256,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 1536,
9
+ "intermediate_size": 5376,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 1024,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 24,
14
+ "num_hidden_layers": 14,
15
+ "rms_norm_eps": 1e-05,
16
+ "transformers_version": "4.28.1",
17
+ "use_cache": true,
18
+ "vocab_size": 50257
19
+ }
first_attention_2/model_9000/training_state.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 9000,
3
+ "update_step": 9000,
4
+ "tokens_seen": 5750833152,
5
+ "tokens_seen_before": 5750194176,
6
+ "update_time": 2.8817710876464844,
7
+ "wandb_id": "hkuum9kt"
8
+ }
first_attention_2_attention_unfreeze/model_1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:335ca2fea887d6351734877081fb0485012166dacdd9dffddaf45a97542c5540
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_10000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399f3c992ccedd62a1a627476e1fbf2da337f382748b28500155dd76f9c9c1c1
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_12000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f433318142a54b2952f46d861bffae52d16c319c33deb9ed0885fece81e5988b
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_13000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8231c6fc85acb8a3f9dffebbd2ef7bf2ededb60ff0e9156c834e55171b4890a0
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_15000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b28f9756b186c271da190744e6caade888e2f28220db16562a52eca35adbecbc
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bf7e28193962e52d92b64d8b1191455056d8f329b5d3030920296e6d1352505
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df15bb40ab7fcf7f243ec79873a49d97d4d9cd0b250cb6a0564eb2c6f70afe41
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2770afb7ed9c9c02311193c7d3448277c0abde3999b0605297c98cf592f96b2b
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_5000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97a4593759067a02ce02201600bc3849ffe4cbe3cb38d139843a891a86766e2d
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_6000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978162edef6a89660973a72eb5060715911fae80e422fc57ecb900135db49035
3
+ size 3831971514
first_attention_2_attention_unfreeze/model_8000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf28dd241e068f5023e250724d1e5627d72274f5df6ec5f2bd63031d09a40e8
3
+ size 3831971514
first_attention_resume_loss_convergence/model_3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b78cdb11858e591dc6fac98c8ab5c1bbe9e75fb0e34ba23f7fb3e40a817dfc
3
+ size 3756457274
first_layer_1/model_1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9e122b12f54c1b0db7d1247c17b1944413ea784d421a3cc1e30d083b987997
3
+ size 1508846842
first_layer_1/model_1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa53593417514a2af1663dabfa810205ce8806929b866234a00019623c1a402a
3
+ size 754421397
first_layer_1/model_10000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c0741af4742da582f045d3ca5062ab1de591ac3cc6859f263f6df901b4f8fa0
3
+ size 1508846842
first_layer_1/model_10000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f432387b760233d555fd966f70c99c54cbe73c9c7983c45d08dbfad81f0d262e
3
+ size 754421397