diff --git a/arxiv/.gitattributes b/arxiv/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..f4f3945bd7150d3e12988485c42da1f8c29c59f8 --- /dev/null +++ b/arxiv/.gitattributes @@ -0,0 +1,54 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.lz4 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +# Audio files - uncompressed +*.pcm filter=lfs diff=lfs merge=lfs -text +*.sam filter=lfs diff=lfs merge=lfs -text +*.raw filter=lfs diff=lfs merge=lfs -text +# Audio files - compressed +*.aac filter=lfs diff=lfs merge=lfs -text +*.flac filter=lfs diff=lfs merge=lfs -text +*.mp3 filter=lfs diff=lfs merge=lfs -text +*.ogg filter=lfs diff=lfs merge=lfs -text +*.wav filter=lfs diff=lfs merge=lfs -text +# Image files - uncompressed +*.bmp filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.tiff filter=lfs diff=lfs merge=lfs -text +# Image files - compressed +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webp filter=lfs diff=lfs merge=lfs -text diff --git a/arxiv/README.md b/arxiv/README.md new file mode 100644 index 0000000000000000000000000000000000000000..154df8298fab5ecf322016157858e08cd1bccbe1 --- /dev/null +++ b/arxiv/README.md @@ -0,0 +1,3 @@ +--- +license: apache-2.0 +--- diff --git a/arxiv/arxiv.py b/arxiv/arxiv.py new file mode 100644 index 0000000000000000000000000000000000000000..11b7c14ce18faf9d7b9207569c05b01d2d5c381e --- /dev/null +++ b/arxiv/arxiv.py @@ -0,0 +1,86 @@ +import io +import json +import os +from glob import glob + +import datasets +import zstandard as zstd +from datasets import GeneratorBasedBuilder +from datasets.utils import Version +from huggingface_hub import snapshot_download + +# Requires REPO_NAME and file name to be same e.g. uspto.py +REPO_NAME = "Multi-Domain-Expert-Layers/arxiv" + +class PileDomainDataset(GeneratorBasedBuilder): + VERSION = Version("1.0.0") + + def _info(self): + return datasets.DatasetInfo( + description="Pile Domain Dataset", + features=datasets.Features( + { + "text": datasets.Value("string"), + } + ), + supervised_keys=None, + ) + + def _split_generators(self, dl_manager): + + dl_path = snapshot_download(repo_id=REPO_NAME, repo_type="dataset") + + return [ + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + gen_kwargs={ + "data_dir": os.path.join(dl_path, "data/train"), + "split": None, + }, + ), + datasets.SplitGenerator( + name="validation", + gen_kwargs={ + "data_dir": os.path.join(dl_path, "data/val"), + "split": None, + }, + ), + datasets.SplitGenerator( + name="validation_pile", + gen_kwargs={ + "data_dir": os.path.join(dl_path, "data/val"), + "split": "pile", + }, + ), + datasets.SplitGenerator( + name="validation_domain", + gen_kwargs={ + "data_dir": os.path.join(dl_path, "data/val"), + "split": "domain", + }, + ), + datasets.SplitGenerator( + name="test_pile", + gen_kwargs={"data_dir": os.path.join(dl_path, "data/test"), "split": "pile"}, + ), + datasets.SplitGenerator( + name="test_domain", + gen_kwargs={"data_dir": os.path.join(dl_path, "data/test"), "split": "domain"}, + ), + ] + + def _generate_examples(self, data_dir, split): + dctx = zstd.ZstdDecompressor() + idx = -1 + file_paths = glob(os.path.join(data_dir, f"*.jsonl.zst")) + if split is not None: + file_paths = [f for f in file_paths if split in f] + for file in file_paths: + with open(file, "rb") as f: + reader = dctx.stream_reader(f) + buffer = io.BufferedReader(reader) + for _, line in enumerate(buffer.readlines()): + data = json.loads(line) + idx += 1 + yield idx, data + diff --git a/arxiv/data/test/domain_test_0.jsonl.zst b/arxiv/data/test/domain_test_0.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..aa90758fae12dbb147921d4adf09c3c8b3b562f4 --- /dev/null +++ b/arxiv/data/test/domain_test_0.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4ff1b67ee43c266f2ee153b56823e3bbe2b3259a7367d55e245257f9b6526f +size 15541310 diff --git a/arxiv/data/test/domain_test_1.jsonl.zst b/arxiv/data/test/domain_test_1.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..05bd1b7f5dadc162f64a2a4bcced4e074f72eac2 --- /dev/null +++ b/arxiv/data/test/domain_test_1.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c9adea90a32a01eda9695b1ea90082a04801b25f3b1c5ece9afb55f1cc6488f +size 16422435 diff --git a/arxiv/data/test/domain_test_2.jsonl.zst b/arxiv/data/test/domain_test_2.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..2cd352ab5c7e07fa81c3343078538ed72b67d8a8 --- /dev/null +++ b/arxiv/data/test/domain_test_2.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589079fbf6c0c068816e62cf5a69ca6785ea557737dc4b6787c60400f09b02e6 +size 2331667 diff --git a/arxiv/data/test/pile_test_0.jsonl.zst b/arxiv/data/test/pile_test_0.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..795a48279cb2ee40491eff58633d1b7bc02bd3b5 --- /dev/null +++ b/arxiv/data/test/pile_test_0.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9fd9239cab4783a1c51b0fe82fc519e3e2a076976d46d5556fd12841e21dcb +size 4767643 diff --git a/arxiv/data/train/domain_01_0.jsonl.zst b/arxiv/data/train/domain_01_0.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..e4d26ecf6a64e636b0f917d655bb46e3d6ac9d76 --- /dev/null +++ b/arxiv/data/train/domain_01_0.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa7784a6e7ddda976f98c5aa3d2050da992b790bf5e8fd87b4bb9adc805eb36 +size 16871402 diff --git a/arxiv/data/train/domain_01_1.jsonl.zst b/arxiv/data/train/domain_01_1.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..ff3efad6cee869f2cfbd9359debef2c43d885565 --- /dev/null +++ b/arxiv/data/train/domain_01_1.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4cf867b9127d7ebb03f1a5130a3effd9fd3373c74ff2b0c37902f2f83bed6a +size 15352033 diff --git a/arxiv/data/train/domain_01_10.jsonl.zst b/arxiv/data/train/domain_01_10.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..13656308f1ce2ed4b27156adaaf9e8b4a98e2680 --- /dev/null +++ b/arxiv/data/train/domain_01_10.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74f2e23b77b1f2c00077c93268019e4e5582bf2a3be76877a66c3fb64037f37 +size 16393690 diff --git a/arxiv/data/train/domain_01_11.jsonl.zst b/arxiv/data/train/domain_01_11.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..cc3306d6a1df6c60173a00a9d0529735276272c2 --- /dev/null +++ b/arxiv/data/train/domain_01_11.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6010834b6472fc68e5bd523c9edc0f658a7204f3123e6fac5e50e9bc621f91c +size 15786441 diff --git a/arxiv/data/train/domain_01_12.jsonl.zst b/arxiv/data/train/domain_01_12.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..a0e5b6bf16f8cccb980f66ba0e5a37bd73243ab9 --- /dev/null +++ b/arxiv/data/train/domain_01_12.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a51c60f08e2428fe750d69e54104c64c7c69f0507c930e8e419aac7e0688d73 +size 15756228 diff --git a/arxiv/data/train/domain_01_13.jsonl.zst b/arxiv/data/train/domain_01_13.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..5d8b15a50e9b0a841c874909ab3a79406200ee6a --- /dev/null +++ b/arxiv/data/train/domain_01_13.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e093e5bb2d254c092e7658a61be8880e1e96b95da97a67ad96061472565c1ad7 +size 16859851 diff --git a/arxiv/data/train/domain_01_14.jsonl.zst b/arxiv/data/train/domain_01_14.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..8196a9c43507dba4665e9e53346ed59fe1548f68 --- /dev/null +++ b/arxiv/data/train/domain_01_14.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427a16237fcc15575746a7e66d54d7561c1389f62f228365934b45d2ca94ec0f +size 17896454 diff --git a/arxiv/data/train/domain_01_15.jsonl.zst b/arxiv/data/train/domain_01_15.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..1510f7963818710aa98809d566c032f86226e7c3 --- /dev/null +++ b/arxiv/data/train/domain_01_15.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc9c50d29448ed57d188e606347bb4b50bd46a670cd03774f6cb50fc7844fc7 +size 16069013 diff --git a/arxiv/data/train/domain_01_16.jsonl.zst b/arxiv/data/train/domain_01_16.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..a7ad7bd1418cb88b3073698ab19dca24802c69cf --- /dev/null +++ b/arxiv/data/train/domain_01_16.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18af5ffafac3eb9cdb8c11272e8a14dbdd54a20891616bfe405edc0c652e2029 +size 16498615 diff --git a/arxiv/data/train/domain_01_17.jsonl.zst b/arxiv/data/train/domain_01_17.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..c270a8c5371a824bba8e2e4a846a6231554ce306 --- /dev/null +++ b/arxiv/data/train/domain_01_17.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40228d039aff5bfcebb145047b1cc25a4ca2df6ed1679eae3af7434bb7e007c0 +size 17044770 diff --git a/arxiv/data/train/domain_01_18.jsonl.zst b/arxiv/data/train/domain_01_18.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..90092b5508b60d485709c703260d9e9c5644c96a --- /dev/null +++ b/arxiv/data/train/domain_01_18.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bf682136e6c27b8f99d3cbbff1e256f8f4a5628ac8e5a2aca637823ea5690d +size 15104542 diff --git a/arxiv/data/train/domain_01_19.jsonl.zst b/arxiv/data/train/domain_01_19.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..b7100e8950e33da6c92030c499f427537033d601 --- /dev/null +++ b/arxiv/data/train/domain_01_19.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6ad38796f194462e155fb9cfad54b6d9e90391dda762dc9642ec44857329d1 +size 17246074 diff --git a/arxiv/data/train/domain_01_2.jsonl.zst b/arxiv/data/train/domain_01_2.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..be8589ace01dc6b6b618af8551ee73e0401c718c --- /dev/null +++ b/arxiv/data/train/domain_01_2.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9323ee5524b55b96cafd887aa8915f508c1cfcddae2648c4fc399f260c3863 +size 16944159 diff --git a/arxiv/data/train/domain_01_20.jsonl.zst b/arxiv/data/train/domain_01_20.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..9b26d2cb0f5effc9f6f49f69b3230f494d1512d5 --- /dev/null +++ b/arxiv/data/train/domain_01_20.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82f1b76d0f8ce4acb4ca51a761b5941af6e29344ca84262bf8f5f689da904aa +size 17583685 diff --git a/arxiv/data/train/domain_01_21.jsonl.zst b/arxiv/data/train/domain_01_21.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..784e4ece76f7c5c03779432a51fc83dbc82b2b4f --- /dev/null +++ b/arxiv/data/train/domain_01_21.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2770439adc730e3959cdfc7b8c0dc0fa46483ebc95695bbdc2b779d63cb351f0 +size 16201201 diff --git a/arxiv/data/train/domain_01_22.jsonl.zst b/arxiv/data/train/domain_01_22.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..2204e460c78e6b98f2b7e419530e679c5a4f640e --- /dev/null +++ b/arxiv/data/train/domain_01_22.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe81e8bd075be385087cc91b929ed84d6ac5656b453f5aa4da88cc9fef6e231 +size 15875636 diff --git a/arxiv/data/train/domain_01_23.jsonl.zst b/arxiv/data/train/domain_01_23.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..f958d78925a3b8b9dd5f0798977ee5aec31bac66 --- /dev/null +++ b/arxiv/data/train/domain_01_23.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bae0734d4c57e5f5eb14e7633f5723fbf814c42a3c6decdc40fec7cdff8919b +size 15613038 diff --git a/arxiv/data/train/domain_01_24.jsonl.zst b/arxiv/data/train/domain_01_24.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..030a8e75f239acb74ccfe88362d154099716f375 --- /dev/null +++ b/arxiv/data/train/domain_01_24.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14ac8267c9b9876a0af691071b1100fe69bf41c312e4567655896a243638305 +size 16030334 diff --git a/arxiv/data/train/domain_01_25.jsonl.zst b/arxiv/data/train/domain_01_25.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..6f979c0d1d88068bdb46738061a3f3a813f80c26 --- /dev/null +++ b/arxiv/data/train/domain_01_25.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fefdb886deb0b278d5ac0ae908c139f1f681f43337bed2dc09514c150c592f0 +size 16306303 diff --git a/arxiv/data/train/domain_01_26.jsonl.zst b/arxiv/data/train/domain_01_26.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..dfc1509aa31ca8a311ae6b3a47105dd29b32ec1f --- /dev/null +++ b/arxiv/data/train/domain_01_26.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1649a382fb76b0aa99f720aca441e85f0b3fc242f5ce9c02497dd34d8b47727 +size 16568164 diff --git a/arxiv/data/train/domain_01_27.jsonl.zst b/arxiv/data/train/domain_01_27.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..9239428c1f984df8adf74bb064e6c0d96277e8b3 --- /dev/null +++ b/arxiv/data/train/domain_01_27.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b524ec3a96c0fef7975dabadcc4fbcea3c12b484b2272494ca1f191b6784d635 +size 16678421 diff --git a/arxiv/data/train/domain_01_28.jsonl.zst b/arxiv/data/train/domain_01_28.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..48848890d438ef2addb560cf097d20cd92f14814 --- /dev/null +++ b/arxiv/data/train/domain_01_28.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81588a43a28768ef709c9d67598c438058981aacad179da441eb612c4d0973e +size 15789654 diff --git a/arxiv/data/train/domain_01_29.jsonl.zst b/arxiv/data/train/domain_01_29.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..a85c2e3a14ddb6ad4f0c6f767ef23a1875c4442f --- /dev/null +++ b/arxiv/data/train/domain_01_29.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e607129ab676eee06f95cb1ec3718b6228626c9d5c87c6a67a8ededaff0db7d3 +size 16599834 diff --git a/arxiv/data/train/domain_01_3.jsonl.zst b/arxiv/data/train/domain_01_3.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..0a997fb9b8f9658e28f2c8909436ceaad5f6bb09 --- /dev/null +++ b/arxiv/data/train/domain_01_3.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e836f36c40c1a7d6a51b2420d78d34e805b888bf918625afc2077584d50450 +size 16454989 diff --git a/arxiv/data/train/domain_01_30.jsonl.zst b/arxiv/data/train/domain_01_30.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..a2c3c1f94bdd0b388764fd33c2c02fe68ff08012 --- /dev/null +++ b/arxiv/data/train/domain_01_30.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79204a9dfe531e5b9023ec9ffb3b464eaf58731f4faf735f43ed4cb2d8c0dfd +size 17781721 diff --git a/arxiv/data/train/domain_01_31.jsonl.zst b/arxiv/data/train/domain_01_31.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..03a2c1780287a9da4254210c30e4067e8339aeef --- /dev/null +++ b/arxiv/data/train/domain_01_31.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f60da479dd18f0000d2adddce34cecf3956ce65bbfdbdbd3652c23d2f81e53 +size 16272841 diff --git a/arxiv/data/train/domain_01_32.jsonl.zst b/arxiv/data/train/domain_01_32.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..218e7a17ef30a6629408f0ba6763b3b5acdfdd27 --- /dev/null +++ b/arxiv/data/train/domain_01_32.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5986a14b8c7ea4b81aa02e100d717b12df1e43177ebd593ea5a062c5cdeffc7 +size 16889257 diff --git a/arxiv/data/train/domain_01_33.jsonl.zst b/arxiv/data/train/domain_01_33.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..9eddea4578860906a6959a53ea126dc64b1bad87 --- /dev/null +++ b/arxiv/data/train/domain_01_33.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c45ef33a542836c2da4e580090d99c117acb2b9d9512771be1df58fa1a7ddd +size 15775554 diff --git a/arxiv/data/train/domain_01_34.jsonl.zst b/arxiv/data/train/domain_01_34.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..6868740ac18865f87a673f9ca56c6150d576f012 --- /dev/null +++ b/arxiv/data/train/domain_01_34.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff0b093f0688a15aeebefe36e58f709e1a082a46906e0c850492a7ab6334ac6 +size 15734091 diff --git a/arxiv/data/train/domain_01_35.jsonl.zst b/arxiv/data/train/domain_01_35.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..24a365816315efcb0c2785a9260290bbfb98f0c7 --- /dev/null +++ b/arxiv/data/train/domain_01_35.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:078600339faf3453218f2312756609c2650cc6f9ea2b339d85528c107a9d0888 +size 14896816 diff --git a/arxiv/data/train/domain_01_36.jsonl.zst b/arxiv/data/train/domain_01_36.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..18accb314e7e29145ad78c0d6cbecc59c427a79e --- /dev/null +++ b/arxiv/data/train/domain_01_36.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0937b5959087fada1811a31becfd9df6b2fc1cb51dee59a2541db7ba445aea63 +size 17199786 diff --git a/arxiv/data/train/domain_01_37.jsonl.zst b/arxiv/data/train/domain_01_37.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..66bc0f59594629b1d591bb923a2952933b1bbe10 --- /dev/null +++ b/arxiv/data/train/domain_01_37.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0372cbb8ef785ed60e4cfdaf40428b15d8228027b9009eaa2cd7c6d800dfc996 +size 16699483 diff --git a/arxiv/data/train/domain_01_38.jsonl.zst b/arxiv/data/train/domain_01_38.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..47589b7ffea0e7405a0538f030f9d3ab7094acda --- /dev/null +++ b/arxiv/data/train/domain_01_38.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5549fe18fa8d3d1d473cb8af7cf470813bcb84b5a05e5f189ac311a6a2af29 +size 17512444 diff --git a/arxiv/data/train/domain_01_39.jsonl.zst b/arxiv/data/train/domain_01_39.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..54f0035d43a03c5a6fc9e23da803ff51a6cd2afa --- /dev/null +++ b/arxiv/data/train/domain_01_39.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3133fc8c983dd3e8ff5b5dcb581a1dbc131bdf37d639f540921f8bce2fac5c8f +size 16086076 diff --git a/arxiv/data/train/domain_01_4.jsonl.zst b/arxiv/data/train/domain_01_4.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..2d8f907de80fe6036d3fb846696179d84d275396 --- /dev/null +++ b/arxiv/data/train/domain_01_4.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0089611d6243813bbbec15dd74c9149d08356ba1ba044e42937439c780a4158 +size 16418274 diff --git a/arxiv/data/train/domain_01_40.jsonl.zst b/arxiv/data/train/domain_01_40.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..0c357d2b69fe74e2300e74c97667b2437e821956 --- /dev/null +++ b/arxiv/data/train/domain_01_40.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0854f772f7b9b3da4f89d4ea6f34a5dbae1184f5bbae85b6b6caa112f98e9607 +size 17721869 diff --git a/arxiv/data/train/domain_01_41.jsonl.zst b/arxiv/data/train/domain_01_41.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..44d834da96db332a3ff759119063dcadcacb03b3 --- /dev/null +++ b/arxiv/data/train/domain_01_41.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a544003097e158425af89961090c61209ed90cca7ea0c96afbffc79a529f277c +size 16088305 diff --git a/arxiv/data/train/domain_01_42.jsonl.zst b/arxiv/data/train/domain_01_42.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..263f6c95d4926ff1a3be06bc3eda3ec2c2f0f519 --- /dev/null +++ b/arxiv/data/train/domain_01_42.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03dc5e33e2910333ec8f1f2ecf8c68f677f733e033342173dc3088ca6e402682 +size 17629572 diff --git a/arxiv/data/train/domain_01_43.jsonl.zst b/arxiv/data/train/domain_01_43.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..5a4eb2863337586f5b51958faf1792b4c43dce1f --- /dev/null +++ b/arxiv/data/train/domain_01_43.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e31c06b0027783ec3e46c1270beadd99b354723b7e23c1a7d4630c002f9bab +size 16331519 diff --git a/arxiv/data/train/domain_01_44.jsonl.zst b/arxiv/data/train/domain_01_44.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..897d8070986b4bb6149c6e1aa3bc9a49207946cc --- /dev/null +++ b/arxiv/data/train/domain_01_44.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d1db760a90458505795db09537d8ed70916c0e648726d3eaa05d4c73ec7086 +size 15958693 diff --git a/arxiv/data/train/domain_01_45.jsonl.zst b/arxiv/data/train/domain_01_45.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..71dc7261840df7b75c570c227fbadce6370a1abf --- /dev/null +++ b/arxiv/data/train/domain_01_45.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a108f61499b7b94f9deb85ea834b999c89254d158e24682422166431fa9690b +size 15578437 diff --git a/arxiv/data/train/domain_01_46.jsonl.zst b/arxiv/data/train/domain_01_46.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..a455648ef3e799e2ba4acce305d5e2d3e4e1838a --- /dev/null +++ b/arxiv/data/train/domain_01_46.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d17b104276159bcff93aa94fbf37946973e67c8391f279f96b98b389934aa803 +size 16044730 diff --git a/arxiv/data/train/domain_01_47.jsonl.zst b/arxiv/data/train/domain_01_47.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..e277516b44ee185ff520543b83dd42bfc3e839e8 --- /dev/null +++ b/arxiv/data/train/domain_01_47.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43388236eb1ef112f15aa20bac18b307d4000d3dd1ce10b892d77e42dd0a164b +size 16537221 diff --git a/arxiv/data/train/domain_01_48.jsonl.zst b/arxiv/data/train/domain_01_48.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..f72ef1002a139a59280d558c472b90917d4e71d4 --- /dev/null +++ b/arxiv/data/train/domain_01_48.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57d2edf921382ad704d2d7a761145e710dcfc106e212329b3c626fa25178641 +size 15112940 diff --git a/arxiv/data/train/domain_01_49.jsonl.zst b/arxiv/data/train/domain_01_49.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..4a4ba9920acee836f3c59ba09ae6a0392264d131 --- /dev/null +++ b/arxiv/data/train/domain_01_49.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811cd77033f37d52df58fe7ddaf6acdb662bb4b8b1f3083e513fcd549098f660 +size 17067851 diff --git a/arxiv/data/train/domain_01_5.jsonl.zst b/arxiv/data/train/domain_01_5.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..288a8ee9c7aac19a631217aaf1e3f88944175890 --- /dev/null +++ b/arxiv/data/train/domain_01_5.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450dde2d075a27c5ecb8390ccf9f9002303b41e947be3d4282684e261c4884d6 +size 16386362 diff --git a/arxiv/data/train/domain_01_50.jsonl.zst b/arxiv/data/train/domain_01_50.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..97f640a3d37fbd049e806f97803b775c31928506 --- /dev/null +++ b/arxiv/data/train/domain_01_50.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bbc8e86938c67cd74a6f2ad532c7345401a6ef4181d6a14a6fac35fde57d74a +size 15947884 diff --git a/arxiv/data/train/domain_01_51.jsonl.zst b/arxiv/data/train/domain_01_51.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..60d8bdfbc31beb542f2eda681ab93c4b3b4a9b40 --- /dev/null +++ b/arxiv/data/train/domain_01_51.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411e52899401d97b3f0c9a10d216d6417225886922101a82ff34324f318a197e +size 16669356 diff --git a/arxiv/data/train/domain_01_52.jsonl.zst b/arxiv/data/train/domain_01_52.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..19aec5326da54d024987c6929bf10400928e2194 --- /dev/null +++ b/arxiv/data/train/domain_01_52.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c1b9c7ec533506674564d535d65f2dc6e695f0f4884439c8297f0c359ae4808 +size 15425242 diff --git a/arxiv/data/train/domain_01_53.jsonl.zst b/arxiv/data/train/domain_01_53.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..d352364a697ec8565cce21f093f8698d08d998ae --- /dev/null +++ b/arxiv/data/train/domain_01_53.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4fb43f5089d14f084abc3516eac597aa377f3b933c4289662574f7d40b7d455 +size 16694552 diff --git a/arxiv/data/train/domain_01_54.jsonl.zst b/arxiv/data/train/domain_01_54.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..393a7428b7dede58afd768fc3eb9581e83ebe14a --- /dev/null +++ b/arxiv/data/train/domain_01_54.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c894d9a636e9bf3f7ffdbf2cda9b55ddc3c2448eb92871a655d2cae4811c13 +size 16195661 diff --git a/arxiv/data/train/domain_01_55.jsonl.zst b/arxiv/data/train/domain_01_55.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..5fc330fe48ebefa4b4a3bc5b2d606706779cf3e7 --- /dev/null +++ b/arxiv/data/train/domain_01_55.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e171d3da98523592fa549b3f55d4c4504f90303a3adf0519e63d22e3cd7a2e +size 17320155 diff --git a/arxiv/data/train/domain_01_56.jsonl.zst b/arxiv/data/train/domain_01_56.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..fdf55a92142edb1ac34b50d7e16ff556002cbd3a --- /dev/null +++ b/arxiv/data/train/domain_01_56.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:349f18ea9a0d38e9261f915b89a10acfc7cf0e7d92f62da5a23abf3914a7c144 +size 16840146 diff --git a/arxiv/data/train/domain_01_57.jsonl.zst b/arxiv/data/train/domain_01_57.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..bfe7491cf75614683a1622067a6e950e0c1a333f --- /dev/null +++ b/arxiv/data/train/domain_01_57.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8369ca0214b6ac2030439d2c66b64fb21905a7b749318f9ff599478093da4418 +size 16352997 diff --git a/arxiv/data/train/domain_01_58.jsonl.zst b/arxiv/data/train/domain_01_58.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..aa7a4843c1ed21a609abb2cfe91cde0cf024c516 --- /dev/null +++ b/arxiv/data/train/domain_01_58.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06dfd32c4a3bf283ed0ac7c7b991c29f041716c82c038a0c671b7fbc4edf90d6 +size 16640241 diff --git a/arxiv/data/train/domain_01_59.jsonl.zst b/arxiv/data/train/domain_01_59.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..c4f3381a190348d9932dd248a3d08884c1f54ac4 --- /dev/null +++ b/arxiv/data/train/domain_01_59.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9832a871301671a7704e9c399384891582eb7bebad58ff3fb50096eea4af5d6f +size 16945109 diff --git a/arxiv/data/train/domain_01_6.jsonl.zst b/arxiv/data/train/domain_01_6.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..808a0754598f3288320a666f70b8ecdf5ce7fbb3 --- /dev/null +++ b/arxiv/data/train/domain_01_6.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68641c5ad80442729099f25ba958ef35c3cc03c798111d81e35afee44feb3d01 +size 16930225 diff --git a/arxiv/data/train/domain_01_60.jsonl.zst b/arxiv/data/train/domain_01_60.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..fa0833e0e73521d3b7da69fdf01877283df302f0 --- /dev/null +++ b/arxiv/data/train/domain_01_60.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af88561043b4b8d2f115249d5f068f83590ecfbc449766b4b37f5da172bedd7d +size 15541951 diff --git a/arxiv/data/train/domain_01_61.jsonl.zst b/arxiv/data/train/domain_01_61.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..1b56ed3374bfce438b69444ae6c409cf104c25de --- /dev/null +++ b/arxiv/data/train/domain_01_61.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6afad3872bebb8ba2cb90fcc5677d98c7a8b0987094b6c277ce8c0e1f254c268 +size 17106357 diff --git a/arxiv/data/train/domain_01_62.jsonl.zst b/arxiv/data/train/domain_01_62.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..abad1541c0777fc32983b8aab89afa27dde65a31 --- /dev/null +++ b/arxiv/data/train/domain_01_62.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6ba68a83b8b79d4e8ef571812842041e6176b54251fab9c6d24e70934b9e97 +size 15110706 diff --git a/arxiv/data/train/domain_01_63.jsonl.zst b/arxiv/data/train/domain_01_63.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..1427a0fda46740abd702d4392b127e08980a3d9f --- /dev/null +++ b/arxiv/data/train/domain_01_63.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ce0d2390b6aca7e371d963c783404f032dd0d04960de224fa48c53682606c3 +size 16389843 diff --git a/arxiv/data/train/domain_01_64.jsonl.zst b/arxiv/data/train/domain_01_64.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..c4d3f23d1c62a8f8f516634a27e514a37ed9dada --- /dev/null +++ b/arxiv/data/train/domain_01_64.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f12a5cbd6bff4cafefa06a8e7400a9cfe33b9e46e949f91aa0555d8e50e36ef5 +size 16890086 diff --git a/arxiv/data/train/domain_01_65.jsonl.zst b/arxiv/data/train/domain_01_65.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..05c19538ebdd4ce7fdb2677a03e7cf9a333ba4b5 --- /dev/null +++ b/arxiv/data/train/domain_01_65.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1cdc7dcd9f78bdfe6dae8680c1c416955a1ce8aa7ba49a01bd333149a89bbf +size 15457547 diff --git a/arxiv/data/train/domain_01_66.jsonl.zst b/arxiv/data/train/domain_01_66.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..90bce2f4d01777b2cb263e6a0854031067421533 --- /dev/null +++ b/arxiv/data/train/domain_01_66.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccc634f5f0668c5eba5e3ff53f694987c4eafe935fc3bec392e732ba11e5fcf5 +size 15958745 diff --git a/arxiv/data/train/domain_01_67.jsonl.zst b/arxiv/data/train/domain_01_67.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..aee592bd2a96ed31e3338657f6e2211627fccc2e --- /dev/null +++ b/arxiv/data/train/domain_01_67.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aed82c1e66185f431f3817dc8984f44f161d1b3755dfff6dc2877455bd377ef +size 15781649 diff --git a/arxiv/data/train/domain_01_68.jsonl.zst b/arxiv/data/train/domain_01_68.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..f77a54fc44640ec10fe09a3fac0c3292e072f453 --- /dev/null +++ b/arxiv/data/train/domain_01_68.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c9c1d6d622f0ca9ca196cf313cbfe8aaae08a9ee62d15711530bb2fd5038a67 +size 15786700 diff --git a/arxiv/data/train/domain_01_69.jsonl.zst b/arxiv/data/train/domain_01_69.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..e2b441c9474c857807d703adbd4aa8485bb52246 --- /dev/null +++ b/arxiv/data/train/domain_01_69.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a867fdfd1b093b659e12c8c891785865721af90e8bdce47245a52ed7abcf66 +size 16126537 diff --git a/arxiv/data/train/domain_01_7.jsonl.zst b/arxiv/data/train/domain_01_7.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..9346e187eab42d52d9951bfec1ce7c756f10ace9 --- /dev/null +++ b/arxiv/data/train/domain_01_7.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:489161e080917327379c15bdb90f5c805336fe9d049fcb5af1b88207d151a3f7 +size 16247937 diff --git a/arxiv/data/train/domain_01_70.jsonl.zst b/arxiv/data/train/domain_01_70.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..a8436ff73bbcb8b34b11d89497794bc73382e581 --- /dev/null +++ b/arxiv/data/train/domain_01_70.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7676308539100ed048600d3f3f031b42a0f3746c46e42b8ded94feae53f5d906 +size 3737108 diff --git a/arxiv/data/train/domain_01_8.jsonl.zst b/arxiv/data/train/domain_01_8.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..b78a0bb743f2e2bebe0a220703d40bcc3688559f --- /dev/null +++ b/arxiv/data/train/domain_01_8.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb35af0721e4ea5504db0ddb9b0d027bce80ee92240d69f0ad8261bf5fab534 +size 16095290 diff --git a/arxiv/data/train/domain_01_9.jsonl.zst b/arxiv/data/train/domain_01_9.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..1007fa5b99adc4a1af6ef25ca5e86363624c98b7 --- /dev/null +++ b/arxiv/data/train/domain_01_9.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7261c353827d39a360ae14988d44834c73e1ebf4e0a0fe41aa16078311aea963 +size 15671066 diff --git a/arxiv/data/train/pile_01_40.jsonl.zst b/arxiv/data/train/pile_01_40.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..801adf3f2d7ad1b49a2c57ff77394507e6c41fdd --- /dev/null +++ b/arxiv/data/train/pile_01_40.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55588bdac6c93cb08a552b284792a9437229d598cecbc0f7b4bc4f49ea4a12af +size 169520093 diff --git a/arxiv/data/val/domain_val_0.jsonl.zst b/arxiv/data/val/domain_val_0.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..aae06acff90bc7f1b17bd193a41b4f80f04733fb --- /dev/null +++ b/arxiv/data/val/domain_val_0.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5450b4b1baded6d8bdafe6c6f1532ba0826872161af9b6abd56d006190e6472 +size 16085000 diff --git a/arxiv/data/val/domain_val_1.jsonl.zst b/arxiv/data/val/domain_val_1.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..2e39af8553726afa73decba76b76ee3fb9d6d820 --- /dev/null +++ b/arxiv/data/val/domain_val_1.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92aa2fac6909d38154d7e05ded427afd380fa9757eacb06dc726f052e94046fe +size 16363202 diff --git a/arxiv/data/val/domain_val_2.jsonl.zst b/arxiv/data/val/domain_val_2.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..9b9826613771a8b17e12276a79d82122caf45b25 --- /dev/null +++ b/arxiv/data/val/domain_val_2.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93e48ee7958f90297948412ffdb1c482bcfeb9032b7de7d8e37dd031a2e2c78e +size 2326502 diff --git a/arxiv/data/val/pile_val_2.jsonl.zst b/arxiv/data/val/pile_val_2.jsonl.zst new file mode 100644 index 0000000000000000000000000000000000000000..94d224a50d823ad19d12a8bbd45441190bece9b9 --- /dev/null +++ b/arxiv/data/val/pile_val_2.jsonl.zst @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a05c6e32caa03aebdfd4752c7be51d737b1dcabcbb982377a836226d151502 +size 4606117