Upload 15 files

Files changed (15) hide show

README.md CHANGED Viewed

@@ -1,3 +1,3 @@
----
-license: cc-by-nc-4.0
----


1	+ Standard `roberta-large` model fine-tuned for one pass over the entire Pile dataset.
2	+
3	+ See [Test-time training on nearest neighbors for large language models](https://github.com/socialfoundations/tttlm) for details.

config.json ADDED Viewed

+{
+  "_name_or_path": "roberta-large",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.25.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

optimizer.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e93fbf2fd68b6572bfd62ccc63a4d5bf877878d1cffeb3978157d339eb175872
+size 2843625531

pytorch_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c4a06cb76703caa40f42b9a5a2263ec090854ca9496eb0c6d48d92d40d9e058
+size 1421788537

rng_state_0.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a3cf857cde4fccedf7ddc8ea22764f0ccabfa7dd77d54a44821ae4f7f138a59
+size 14583

rng_state_1.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0240d913625771daef0627984797c952e5b72ced066a97a350688dfb3186d38b
+size 14583

rng_state_2.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:142cfd70028fac33c13199cef217f3630af8c86d5c371ff95e21fa767b8e54da
+size 14583

rng_state_3.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:23e62c20142cb8fe17074069781f1d17fef8071386188b3a52366d9676e55ec6
+size 14583

rng_state_4.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7de99ed9d4b74f79434451023e1e1132fb2a7fdf4c30205833d363fe03d52fa
+size 14583

rng_state_5.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:45fcc54ac4c5bedf883c93cdba5f32f6ddaf67137d0647bf78bf8390af34f86d
+size 14583

rng_state_6.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba32ebab38c78325440382eca50aaa4dffa28f3757ffaa5859b438fe8973e4a7
+size 14583

rng_state_7.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b20e26914fe2908dcdd6602388e794a0e192becf782a0452606f74b61b075ed
+size 14583

scheduler.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c302f59942612bd66fd76eacfaa809629c35b3379073cdf046103c10bf6bfde
+size 627

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8113701856eb55a410385c432dba5363eb404dfe0b164257d9a2c1ba3ba87409
+size 3451