mrtzh commited on
Commit
f2ffdac
1 Parent(s): 2e7f207

Upload 15 files

Browse files
README.md CHANGED
@@ -1,3 +1,3 @@
1
- ---
2
- license: cc-by-nc-4.0
3
- ---
 
1
+ Standard `roberta-large` model fine-tuned for one pass over the entire Pile dataset.
2
+
3
+ See [Test-time training on nearest neighbors for large language models](https://github.com/socialfoundations/tttlm) for details.
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-large",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e93fbf2fd68b6572bfd62ccc63a4d5bf877878d1cffeb3978157d339eb175872
3
+ size 2843625531
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4a06cb76703caa40f42b9a5a2263ec090854ca9496eb0c6d48d92d40d9e058
3
+ size 1421788537
rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3cf857cde4fccedf7ddc8ea22764f0ccabfa7dd77d54a44821ae4f7f138a59
3
+ size 14583
rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0240d913625771daef0627984797c952e5b72ced066a97a350688dfb3186d38b
3
+ size 14583
rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:142cfd70028fac33c13199cef217f3630af8c86d5c371ff95e21fa767b8e54da
3
+ size 14583
rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23e62c20142cb8fe17074069781f1d17fef8071386188b3a52366d9676e55ec6
3
+ size 14583
rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7de99ed9d4b74f79434451023e1e1132fb2a7fdf4c30205833d363fe03d52fa
3
+ size 14583
rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45fcc54ac4c5bedf883c93cdba5f32f6ddaf67137d0647bf78bf8390af34f86d
3
+ size 14583
rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba32ebab38c78325440382eca50aaa4dffa28f3757ffaa5859b438fe8973e4a7
3
+ size 14583
rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b20e26914fe2908dcdd6602388e794a0e192becf782a0452606f74b61b075ed
3
+ size 14583
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c302f59942612bd66fd76eacfaa809629c35b3379073cdf046103c10bf6bfde
3
+ size 627
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8113701856eb55a410385c432dba5363eb404dfe0b164257d9a2c1ba3ba87409
3
+ size 3451