Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- arxiv/.gitattributes +54 -0
- arxiv/README.md +3 -0
- arxiv/arxiv.py +86 -0
- arxiv/data/test/domain_test_0.jsonl.zst +3 -0
- arxiv/data/test/domain_test_1.jsonl.zst +3 -0
- arxiv/data/test/domain_test_2.jsonl.zst +3 -0
- arxiv/data/test/pile_test_0.jsonl.zst +3 -0
- arxiv/data/train/domain_01_0.jsonl.zst +3 -0
- arxiv/data/train/domain_01_1.jsonl.zst +3 -0
- arxiv/data/train/domain_01_10.jsonl.zst +3 -0
- arxiv/data/train/domain_01_11.jsonl.zst +3 -0
- arxiv/data/train/domain_01_12.jsonl.zst +3 -0
- arxiv/data/train/domain_01_13.jsonl.zst +3 -0
- arxiv/data/train/domain_01_14.jsonl.zst +3 -0
- arxiv/data/train/domain_01_15.jsonl.zst +3 -0
- arxiv/data/train/domain_01_16.jsonl.zst +3 -0
- arxiv/data/train/domain_01_17.jsonl.zst +3 -0
- arxiv/data/train/domain_01_18.jsonl.zst +3 -0
- arxiv/data/train/domain_01_19.jsonl.zst +3 -0
- arxiv/data/train/domain_01_2.jsonl.zst +3 -0
- arxiv/data/train/domain_01_20.jsonl.zst +3 -0
- arxiv/data/train/domain_01_21.jsonl.zst +3 -0
- arxiv/data/train/domain_01_22.jsonl.zst +3 -0
- arxiv/data/train/domain_01_23.jsonl.zst +3 -0
- arxiv/data/train/domain_01_24.jsonl.zst +3 -0
- arxiv/data/train/domain_01_25.jsonl.zst +3 -0
- arxiv/data/train/domain_01_26.jsonl.zst +3 -0
- arxiv/data/train/domain_01_27.jsonl.zst +3 -0
- arxiv/data/train/domain_01_28.jsonl.zst +3 -0
- arxiv/data/train/domain_01_29.jsonl.zst +3 -0
- arxiv/data/train/domain_01_3.jsonl.zst +3 -0
- arxiv/data/train/domain_01_30.jsonl.zst +3 -0
- arxiv/data/train/domain_01_31.jsonl.zst +3 -0
- arxiv/data/train/domain_01_32.jsonl.zst +3 -0
- arxiv/data/train/domain_01_33.jsonl.zst +3 -0
- arxiv/data/train/domain_01_34.jsonl.zst +3 -0
- arxiv/data/train/domain_01_35.jsonl.zst +3 -0
- arxiv/data/train/domain_01_36.jsonl.zst +3 -0
- arxiv/data/train/domain_01_37.jsonl.zst +3 -0
- arxiv/data/train/domain_01_38.jsonl.zst +3 -0
- arxiv/data/train/domain_01_39.jsonl.zst +3 -0
- arxiv/data/train/domain_01_4.jsonl.zst +3 -0
- arxiv/data/train/domain_01_40.jsonl.zst +3 -0
- arxiv/data/train/domain_01_41.jsonl.zst +3 -0
- arxiv/data/train/domain_01_42.jsonl.zst +3 -0
- arxiv/data/train/domain_01_43.jsonl.zst +3 -0
- arxiv/data/train/domain_01_44.jsonl.zst +3 -0
- arxiv/data/train/domain_01_45.jsonl.zst +3 -0
- arxiv/data/train/domain_01_46.jsonl.zst +3 -0
- arxiv/data/train/domain_01_47.jsonl.zst +3 -0
arxiv/.gitattributes
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
# Audio files - uncompressed
|
37 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
40 |
+
# Audio files - compressed
|
41 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
42 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
43 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
44 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
45 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
46 |
+
# Image files - uncompressed
|
47 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
48 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
49 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
50 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
51 |
+
# Image files - compressed
|
52 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
53 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
54 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
arxiv/README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
---
|
arxiv/arxiv.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from glob import glob
|
5 |
+
|
6 |
+
import datasets
|
7 |
+
import zstandard as zstd
|
8 |
+
from datasets import GeneratorBasedBuilder
|
9 |
+
from datasets.utils import Version
|
10 |
+
from huggingface_hub import snapshot_download
|
11 |
+
|
12 |
+
# Requires REPO_NAME and file name to be same e.g. uspto.py
|
13 |
+
REPO_NAME = "Multi-Domain-Expert-Layers/arxiv"
|
14 |
+
|
15 |
+
class PileDomainDataset(GeneratorBasedBuilder):
|
16 |
+
VERSION = Version("1.0.0")
|
17 |
+
|
18 |
+
def _info(self):
|
19 |
+
return datasets.DatasetInfo(
|
20 |
+
description="Pile Domain Dataset",
|
21 |
+
features=datasets.Features(
|
22 |
+
{
|
23 |
+
"text": datasets.Value("string"),
|
24 |
+
}
|
25 |
+
),
|
26 |
+
supervised_keys=None,
|
27 |
+
)
|
28 |
+
|
29 |
+
def _split_generators(self, dl_manager):
|
30 |
+
|
31 |
+
dl_path = snapshot_download(repo_id=REPO_NAME, repo_type="dataset")
|
32 |
+
|
33 |
+
return [
|
34 |
+
datasets.SplitGenerator(
|
35 |
+
name=datasets.Split.TRAIN,
|
36 |
+
gen_kwargs={
|
37 |
+
"data_dir": os.path.join(dl_path, "data/train"),
|
38 |
+
"split": None,
|
39 |
+
},
|
40 |
+
),
|
41 |
+
datasets.SplitGenerator(
|
42 |
+
name="validation",
|
43 |
+
gen_kwargs={
|
44 |
+
"data_dir": os.path.join(dl_path, "data/val"),
|
45 |
+
"split": None,
|
46 |
+
},
|
47 |
+
),
|
48 |
+
datasets.SplitGenerator(
|
49 |
+
name="validation_pile",
|
50 |
+
gen_kwargs={
|
51 |
+
"data_dir": os.path.join(dl_path, "data/val"),
|
52 |
+
"split": "pile",
|
53 |
+
},
|
54 |
+
),
|
55 |
+
datasets.SplitGenerator(
|
56 |
+
name="validation_domain",
|
57 |
+
gen_kwargs={
|
58 |
+
"data_dir": os.path.join(dl_path, "data/val"),
|
59 |
+
"split": "domain",
|
60 |
+
},
|
61 |
+
),
|
62 |
+
datasets.SplitGenerator(
|
63 |
+
name="test_pile",
|
64 |
+
gen_kwargs={"data_dir": os.path.join(dl_path, "data/test"), "split": "pile"},
|
65 |
+
),
|
66 |
+
datasets.SplitGenerator(
|
67 |
+
name="test_domain",
|
68 |
+
gen_kwargs={"data_dir": os.path.join(dl_path, "data/test"), "split": "domain"},
|
69 |
+
),
|
70 |
+
]
|
71 |
+
|
72 |
+
def _generate_examples(self, data_dir, split):
|
73 |
+
dctx = zstd.ZstdDecompressor()
|
74 |
+
idx = -1
|
75 |
+
file_paths = glob(os.path.join(data_dir, f"*.jsonl.zst"))
|
76 |
+
if split is not None:
|
77 |
+
file_paths = [f for f in file_paths if split in f]
|
78 |
+
for file in file_paths:
|
79 |
+
with open(file, "rb") as f:
|
80 |
+
reader = dctx.stream_reader(f)
|
81 |
+
buffer = io.BufferedReader(reader)
|
82 |
+
for _, line in enumerate(buffer.readlines()):
|
83 |
+
data = json.loads(line)
|
84 |
+
idx += 1
|
85 |
+
yield idx, data
|
86 |
+
|
arxiv/data/test/domain_test_0.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a4ff1b67ee43c266f2ee153b56823e3bbe2b3259a7367d55e245257f9b6526f
|
3 |
+
size 15541310
|
arxiv/data/test/domain_test_1.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c9adea90a32a01eda9695b1ea90082a04801b25f3b1c5ece9afb55f1cc6488f
|
3 |
+
size 16422435
|
arxiv/data/test/domain_test_2.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:589079fbf6c0c068816e62cf5a69ca6785ea557737dc4b6787c60400f09b02e6
|
3 |
+
size 2331667
|
arxiv/data/test/pile_test_0.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d9fd9239cab4783a1c51b0fe82fc519e3e2a076976d46d5556fd12841e21dcb
|
3 |
+
size 4767643
|
arxiv/data/train/domain_01_0.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fa7784a6e7ddda976f98c5aa3d2050da992b790bf5e8fd87b4bb9adc805eb36
|
3 |
+
size 16871402
|
arxiv/data/train/domain_01_1.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df4cf867b9127d7ebb03f1a5130a3effd9fd3373c74ff2b0c37902f2f83bed6a
|
3 |
+
size 15352033
|
arxiv/data/train/domain_01_10.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c74f2e23b77b1f2c00077c93268019e4e5582bf2a3be76877a66c3fb64037f37
|
3 |
+
size 16393690
|
arxiv/data/train/domain_01_11.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6010834b6472fc68e5bd523c9edc0f658a7204f3123e6fac5e50e9bc621f91c
|
3 |
+
size 15786441
|
arxiv/data/train/domain_01_12.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a51c60f08e2428fe750d69e54104c64c7c69f0507c930e8e419aac7e0688d73
|
3 |
+
size 15756228
|
arxiv/data/train/domain_01_13.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e093e5bb2d254c092e7658a61be8880e1e96b95da97a67ad96061472565c1ad7
|
3 |
+
size 16859851
|
arxiv/data/train/domain_01_14.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:427a16237fcc15575746a7e66d54d7561c1389f62f228365934b45d2ca94ec0f
|
3 |
+
size 17896454
|
arxiv/data/train/domain_01_15.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdc9c50d29448ed57d188e606347bb4b50bd46a670cd03774f6cb50fc7844fc7
|
3 |
+
size 16069013
|
arxiv/data/train/domain_01_16.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18af5ffafac3eb9cdb8c11272e8a14dbdd54a20891616bfe405edc0c652e2029
|
3 |
+
size 16498615
|
arxiv/data/train/domain_01_17.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40228d039aff5bfcebb145047b1cc25a4ca2df6ed1679eae3af7434bb7e007c0
|
3 |
+
size 17044770
|
arxiv/data/train/domain_01_18.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64bf682136e6c27b8f99d3cbbff1e256f8f4a5628ac8e5a2aca637823ea5690d
|
3 |
+
size 15104542
|
arxiv/data/train/domain_01_19.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd6ad38796f194462e155fb9cfad54b6d9e90391dda762dc9642ec44857329d1
|
3 |
+
size 17246074
|
arxiv/data/train/domain_01_2.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f9323ee5524b55b96cafd887aa8915f508c1cfcddae2648c4fc399f260c3863
|
3 |
+
size 16944159
|
arxiv/data/train/domain_01_20.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b82f1b76d0f8ce4acb4ca51a761b5941af6e29344ca84262bf8f5f689da904aa
|
3 |
+
size 17583685
|
arxiv/data/train/domain_01_21.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2770439adc730e3959cdfc7b8c0dc0fa46483ebc95695bbdc2b779d63cb351f0
|
3 |
+
size 16201201
|
arxiv/data/train/domain_01_22.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbe81e8bd075be385087cc91b929ed84d6ac5656b453f5aa4da88cc9fef6e231
|
3 |
+
size 15875636
|
arxiv/data/train/domain_01_23.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bae0734d4c57e5f5eb14e7633f5723fbf814c42a3c6decdc40fec7cdff8919b
|
3 |
+
size 15613038
|
arxiv/data/train/domain_01_24.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e14ac8267c9b9876a0af691071b1100fe69bf41c312e4567655896a243638305
|
3 |
+
size 16030334
|
arxiv/data/train/domain_01_25.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fefdb886deb0b278d5ac0ae908c139f1f681f43337bed2dc09514c150c592f0
|
3 |
+
size 16306303
|
arxiv/data/train/domain_01_26.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1649a382fb76b0aa99f720aca441e85f0b3fc242f5ce9c02497dd34d8b47727
|
3 |
+
size 16568164
|
arxiv/data/train/domain_01_27.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b524ec3a96c0fef7975dabadcc4fbcea3c12b484b2272494ca1f191b6784d635
|
3 |
+
size 16678421
|
arxiv/data/train/domain_01_28.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e81588a43a28768ef709c9d67598c438058981aacad179da441eb612c4d0973e
|
3 |
+
size 15789654
|
arxiv/data/train/domain_01_29.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e607129ab676eee06f95cb1ec3718b6228626c9d5c87c6a67a8ededaff0db7d3
|
3 |
+
size 16599834
|
arxiv/data/train/domain_01_3.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2e836f36c40c1a7d6a51b2420d78d34e805b888bf918625afc2077584d50450
|
3 |
+
size 16454989
|
arxiv/data/train/domain_01_30.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b79204a9dfe531e5b9023ec9ffb3b464eaf58731f4faf735f43ed4cb2d8c0dfd
|
3 |
+
size 17781721
|
arxiv/data/train/domain_01_31.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2f60da479dd18f0000d2adddce34cecf3956ce65bbfdbdbd3652c23d2f81e53
|
3 |
+
size 16272841
|
arxiv/data/train/domain_01_32.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5986a14b8c7ea4b81aa02e100d717b12df1e43177ebd593ea5a062c5cdeffc7
|
3 |
+
size 16889257
|
arxiv/data/train/domain_01_33.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8c45ef33a542836c2da4e580090d99c117acb2b9d9512771be1df58fa1a7ddd
|
3 |
+
size 15775554
|
arxiv/data/train/domain_01_34.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fff0b093f0688a15aeebefe36e58f709e1a082a46906e0c850492a7ab6334ac6
|
3 |
+
size 15734091
|
arxiv/data/train/domain_01_35.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:078600339faf3453218f2312756609c2650cc6f9ea2b339d85528c107a9d0888
|
3 |
+
size 14896816
|
arxiv/data/train/domain_01_36.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0937b5959087fada1811a31becfd9df6b2fc1cb51dee59a2541db7ba445aea63
|
3 |
+
size 17199786
|
arxiv/data/train/domain_01_37.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0372cbb8ef785ed60e4cfdaf40428b15d8228027b9009eaa2cd7c6d800dfc996
|
3 |
+
size 16699483
|
arxiv/data/train/domain_01_38.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a5549fe18fa8d3d1d473cb8af7cf470813bcb84b5a05e5f189ac311a6a2af29
|
3 |
+
size 17512444
|
arxiv/data/train/domain_01_39.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3133fc8c983dd3e8ff5b5dcb581a1dbc131bdf37d639f540921f8bce2fac5c8f
|
3 |
+
size 16086076
|
arxiv/data/train/domain_01_4.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0089611d6243813bbbec15dd74c9149d08356ba1ba044e42937439c780a4158
|
3 |
+
size 16418274
|
arxiv/data/train/domain_01_40.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0854f772f7b9b3da4f89d4ea6f34a5dbae1184f5bbae85b6b6caa112f98e9607
|
3 |
+
size 17721869
|
arxiv/data/train/domain_01_41.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a544003097e158425af89961090c61209ed90cca7ea0c96afbffc79a529f277c
|
3 |
+
size 16088305
|
arxiv/data/train/domain_01_42.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03dc5e33e2910333ec8f1f2ecf8c68f677f733e033342173dc3088ca6e402682
|
3 |
+
size 17629572
|
arxiv/data/train/domain_01_43.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36e31c06b0027783ec3e46c1270beadd99b354723b7e23c1a7d4630c002f9bab
|
3 |
+
size 16331519
|
arxiv/data/train/domain_01_44.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76d1db760a90458505795db09537d8ed70916c0e648726d3eaa05d4c73ec7086
|
3 |
+
size 15958693
|
arxiv/data/train/domain_01_45.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a108f61499b7b94f9deb85ea834b999c89254d158e24682422166431fa9690b
|
3 |
+
size 15578437
|
arxiv/data/train/domain_01_46.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d17b104276159bcff93aa94fbf37946973e67c8391f279f96b98b389934aa803
|
3 |
+
size 16044730
|
arxiv/data/train/domain_01_47.jsonl.zst
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43388236eb1ef112f15aa20bac18b307d4000d3dd1ce10b892d77e42dd0a164b
|
3 |
+
size 16537221
|