xzyao commited on
Commit
9fb22af
1 Parent(s): a91ef18

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. arxiv/.gitattributes +54 -0
  2. arxiv/README.md +3 -0
  3. arxiv/arxiv.py +86 -0
  4. arxiv/data/test/domain_test_0.jsonl.zst +3 -0
  5. arxiv/data/test/domain_test_1.jsonl.zst +3 -0
  6. arxiv/data/test/domain_test_2.jsonl.zst +3 -0
  7. arxiv/data/test/pile_test_0.jsonl.zst +3 -0
  8. arxiv/data/train/domain_01_0.jsonl.zst +3 -0
  9. arxiv/data/train/domain_01_1.jsonl.zst +3 -0
  10. arxiv/data/train/domain_01_10.jsonl.zst +3 -0
  11. arxiv/data/train/domain_01_11.jsonl.zst +3 -0
  12. arxiv/data/train/domain_01_12.jsonl.zst +3 -0
  13. arxiv/data/train/domain_01_13.jsonl.zst +3 -0
  14. arxiv/data/train/domain_01_14.jsonl.zst +3 -0
  15. arxiv/data/train/domain_01_15.jsonl.zst +3 -0
  16. arxiv/data/train/domain_01_16.jsonl.zst +3 -0
  17. arxiv/data/train/domain_01_17.jsonl.zst +3 -0
  18. arxiv/data/train/domain_01_18.jsonl.zst +3 -0
  19. arxiv/data/train/domain_01_19.jsonl.zst +3 -0
  20. arxiv/data/train/domain_01_2.jsonl.zst +3 -0
  21. arxiv/data/train/domain_01_20.jsonl.zst +3 -0
  22. arxiv/data/train/domain_01_21.jsonl.zst +3 -0
  23. arxiv/data/train/domain_01_22.jsonl.zst +3 -0
  24. arxiv/data/train/domain_01_23.jsonl.zst +3 -0
  25. arxiv/data/train/domain_01_24.jsonl.zst +3 -0
  26. arxiv/data/train/domain_01_25.jsonl.zst +3 -0
  27. arxiv/data/train/domain_01_26.jsonl.zst +3 -0
  28. arxiv/data/train/domain_01_27.jsonl.zst +3 -0
  29. arxiv/data/train/domain_01_28.jsonl.zst +3 -0
  30. arxiv/data/train/domain_01_29.jsonl.zst +3 -0
  31. arxiv/data/train/domain_01_3.jsonl.zst +3 -0
  32. arxiv/data/train/domain_01_30.jsonl.zst +3 -0
  33. arxiv/data/train/domain_01_31.jsonl.zst +3 -0
  34. arxiv/data/train/domain_01_32.jsonl.zst +3 -0
  35. arxiv/data/train/domain_01_33.jsonl.zst +3 -0
  36. arxiv/data/train/domain_01_34.jsonl.zst +3 -0
  37. arxiv/data/train/domain_01_35.jsonl.zst +3 -0
  38. arxiv/data/train/domain_01_36.jsonl.zst +3 -0
  39. arxiv/data/train/domain_01_37.jsonl.zst +3 -0
  40. arxiv/data/train/domain_01_38.jsonl.zst +3 -0
  41. arxiv/data/train/domain_01_39.jsonl.zst +3 -0
  42. arxiv/data/train/domain_01_4.jsonl.zst +3 -0
  43. arxiv/data/train/domain_01_40.jsonl.zst +3 -0
  44. arxiv/data/train/domain_01_41.jsonl.zst +3 -0
  45. arxiv/data/train/domain_01_42.jsonl.zst +3 -0
  46. arxiv/data/train/domain_01_43.jsonl.zst +3 -0
  47. arxiv/data/train/domain_01_44.jsonl.zst +3 -0
  48. arxiv/data/train/domain_01_45.jsonl.zst +3 -0
  49. arxiv/data/train/domain_01_46.jsonl.zst +3 -0
  50. arxiv/data/train/domain_01_47.jsonl.zst +3 -0
arxiv/.gitattributes ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ # Audio files - uncompressed
37
+ *.pcm filter=lfs diff=lfs merge=lfs -text
38
+ *.sam filter=lfs diff=lfs merge=lfs -text
39
+ *.raw filter=lfs diff=lfs merge=lfs -text
40
+ # Audio files - compressed
41
+ *.aac filter=lfs diff=lfs merge=lfs -text
42
+ *.flac filter=lfs diff=lfs merge=lfs -text
43
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
44
+ *.ogg filter=lfs diff=lfs merge=lfs -text
45
+ *.wav filter=lfs diff=lfs merge=lfs -text
46
+ # Image files - uncompressed
47
+ *.bmp filter=lfs diff=lfs merge=lfs -text
48
+ *.gif filter=lfs diff=lfs merge=lfs -text
49
+ *.png filter=lfs diff=lfs merge=lfs -text
50
+ *.tiff filter=lfs diff=lfs merge=lfs -text
51
+ # Image files - compressed
52
+ *.jpg filter=lfs diff=lfs merge=lfs -text
53
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
54
+ *.webp filter=lfs diff=lfs merge=lfs -text
arxiv/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
arxiv/arxiv.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import os
4
+ from glob import glob
5
+
6
+ import datasets
7
+ import zstandard as zstd
8
+ from datasets import GeneratorBasedBuilder
9
+ from datasets.utils import Version
10
+ from huggingface_hub import snapshot_download
11
+
12
+ # Requires REPO_NAME and file name to be same e.g. uspto.py
13
+ REPO_NAME = "Multi-Domain-Expert-Layers/arxiv"
14
+
15
+ class PileDomainDataset(GeneratorBasedBuilder):
16
+ VERSION = Version("1.0.0")
17
+
18
+ def _info(self):
19
+ return datasets.DatasetInfo(
20
+ description="Pile Domain Dataset",
21
+ features=datasets.Features(
22
+ {
23
+ "text": datasets.Value("string"),
24
+ }
25
+ ),
26
+ supervised_keys=None,
27
+ )
28
+
29
+ def _split_generators(self, dl_manager):
30
+
31
+ dl_path = snapshot_download(repo_id=REPO_NAME, repo_type="dataset")
32
+
33
+ return [
34
+ datasets.SplitGenerator(
35
+ name=datasets.Split.TRAIN,
36
+ gen_kwargs={
37
+ "data_dir": os.path.join(dl_path, "data/train"),
38
+ "split": None,
39
+ },
40
+ ),
41
+ datasets.SplitGenerator(
42
+ name="validation",
43
+ gen_kwargs={
44
+ "data_dir": os.path.join(dl_path, "data/val"),
45
+ "split": None,
46
+ },
47
+ ),
48
+ datasets.SplitGenerator(
49
+ name="validation_pile",
50
+ gen_kwargs={
51
+ "data_dir": os.path.join(dl_path, "data/val"),
52
+ "split": "pile",
53
+ },
54
+ ),
55
+ datasets.SplitGenerator(
56
+ name="validation_domain",
57
+ gen_kwargs={
58
+ "data_dir": os.path.join(dl_path, "data/val"),
59
+ "split": "domain",
60
+ },
61
+ ),
62
+ datasets.SplitGenerator(
63
+ name="test_pile",
64
+ gen_kwargs={"data_dir": os.path.join(dl_path, "data/test"), "split": "pile"},
65
+ ),
66
+ datasets.SplitGenerator(
67
+ name="test_domain",
68
+ gen_kwargs={"data_dir": os.path.join(dl_path, "data/test"), "split": "domain"},
69
+ ),
70
+ ]
71
+
72
+ def _generate_examples(self, data_dir, split):
73
+ dctx = zstd.ZstdDecompressor()
74
+ idx = -1
75
+ file_paths = glob(os.path.join(data_dir, f"*.jsonl.zst"))
76
+ if split is not None:
77
+ file_paths = [f for f in file_paths if split in f]
78
+ for file in file_paths:
79
+ with open(file, "rb") as f:
80
+ reader = dctx.stream_reader(f)
81
+ buffer = io.BufferedReader(reader)
82
+ for _, line in enumerate(buffer.readlines()):
83
+ data = json.loads(line)
84
+ idx += 1
85
+ yield idx, data
86
+
arxiv/data/test/domain_test_0.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4ff1b67ee43c266f2ee153b56823e3bbe2b3259a7367d55e245257f9b6526f
3
+ size 15541310
arxiv/data/test/domain_test_1.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c9adea90a32a01eda9695b1ea90082a04801b25f3b1c5ece9afb55f1cc6488f
3
+ size 16422435
arxiv/data/test/domain_test_2.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589079fbf6c0c068816e62cf5a69ca6785ea557737dc4b6787c60400f09b02e6
3
+ size 2331667
arxiv/data/test/pile_test_0.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d9fd9239cab4783a1c51b0fe82fc519e3e2a076976d46d5556fd12841e21dcb
3
+ size 4767643
arxiv/data/train/domain_01_0.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa7784a6e7ddda976f98c5aa3d2050da992b790bf5e8fd87b4bb9adc805eb36
3
+ size 16871402
arxiv/data/train/domain_01_1.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df4cf867b9127d7ebb03f1a5130a3effd9fd3373c74ff2b0c37902f2f83bed6a
3
+ size 15352033
arxiv/data/train/domain_01_10.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74f2e23b77b1f2c00077c93268019e4e5582bf2a3be76877a66c3fb64037f37
3
+ size 16393690
arxiv/data/train/domain_01_11.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6010834b6472fc68e5bd523c9edc0f658a7204f3123e6fac5e50e9bc621f91c
3
+ size 15786441
arxiv/data/train/domain_01_12.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a51c60f08e2428fe750d69e54104c64c7c69f0507c930e8e419aac7e0688d73
3
+ size 15756228
arxiv/data/train/domain_01_13.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e093e5bb2d254c092e7658a61be8880e1e96b95da97a67ad96061472565c1ad7
3
+ size 16859851
arxiv/data/train/domain_01_14.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427a16237fcc15575746a7e66d54d7561c1389f62f228365934b45d2ca94ec0f
3
+ size 17896454
arxiv/data/train/domain_01_15.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc9c50d29448ed57d188e606347bb4b50bd46a670cd03774f6cb50fc7844fc7
3
+ size 16069013
arxiv/data/train/domain_01_16.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18af5ffafac3eb9cdb8c11272e8a14dbdd54a20891616bfe405edc0c652e2029
3
+ size 16498615
arxiv/data/train/domain_01_17.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40228d039aff5bfcebb145047b1cc25a4ca2df6ed1679eae3af7434bb7e007c0
3
+ size 17044770
arxiv/data/train/domain_01_18.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64bf682136e6c27b8f99d3cbbff1e256f8f4a5628ac8e5a2aca637823ea5690d
3
+ size 15104542
arxiv/data/train/domain_01_19.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6ad38796f194462e155fb9cfad54b6d9e90391dda762dc9642ec44857329d1
3
+ size 17246074
arxiv/data/train/domain_01_2.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9323ee5524b55b96cafd887aa8915f508c1cfcddae2648c4fc399f260c3863
3
+ size 16944159
arxiv/data/train/domain_01_20.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b82f1b76d0f8ce4acb4ca51a761b5941af6e29344ca84262bf8f5f689da904aa
3
+ size 17583685
arxiv/data/train/domain_01_21.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2770439adc730e3959cdfc7b8c0dc0fa46483ebc95695bbdc2b779d63cb351f0
3
+ size 16201201
arxiv/data/train/domain_01_22.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe81e8bd075be385087cc91b929ed84d6ac5656b453f5aa4da88cc9fef6e231
3
+ size 15875636
arxiv/data/train/domain_01_23.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bae0734d4c57e5f5eb14e7633f5723fbf814c42a3c6decdc40fec7cdff8919b
3
+ size 15613038
arxiv/data/train/domain_01_24.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e14ac8267c9b9876a0af691071b1100fe69bf41c312e4567655896a243638305
3
+ size 16030334
arxiv/data/train/domain_01_25.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fefdb886deb0b278d5ac0ae908c139f1f681f43337bed2dc09514c150c592f0
3
+ size 16306303
arxiv/data/train/domain_01_26.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1649a382fb76b0aa99f720aca441e85f0b3fc242f5ce9c02497dd34d8b47727
3
+ size 16568164
arxiv/data/train/domain_01_27.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b524ec3a96c0fef7975dabadcc4fbcea3c12b484b2272494ca1f191b6784d635
3
+ size 16678421
arxiv/data/train/domain_01_28.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81588a43a28768ef709c9d67598c438058981aacad179da441eb612c4d0973e
3
+ size 15789654
arxiv/data/train/domain_01_29.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e607129ab676eee06f95cb1ec3718b6228626c9d5c87c6a67a8ededaff0db7d3
3
+ size 16599834
arxiv/data/train/domain_01_3.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e836f36c40c1a7d6a51b2420d78d34e805b888bf918625afc2077584d50450
3
+ size 16454989
arxiv/data/train/domain_01_30.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79204a9dfe531e5b9023ec9ffb3b464eaf58731f4faf735f43ed4cb2d8c0dfd
3
+ size 17781721
arxiv/data/train/domain_01_31.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2f60da479dd18f0000d2adddce34cecf3956ce65bbfdbdbd3652c23d2f81e53
3
+ size 16272841
arxiv/data/train/domain_01_32.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5986a14b8c7ea4b81aa02e100d717b12df1e43177ebd593ea5a062c5cdeffc7
3
+ size 16889257
arxiv/data/train/domain_01_33.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c45ef33a542836c2da4e580090d99c117acb2b9d9512771be1df58fa1a7ddd
3
+ size 15775554
arxiv/data/train/domain_01_34.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff0b093f0688a15aeebefe36e58f709e1a082a46906e0c850492a7ab6334ac6
3
+ size 15734091
arxiv/data/train/domain_01_35.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078600339faf3453218f2312756609c2650cc6f9ea2b339d85528c107a9d0888
3
+ size 14896816
arxiv/data/train/domain_01_36.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0937b5959087fada1811a31becfd9df6b2fc1cb51dee59a2541db7ba445aea63
3
+ size 17199786
arxiv/data/train/domain_01_37.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0372cbb8ef785ed60e4cfdaf40428b15d8228027b9009eaa2cd7c6d800dfc996
3
+ size 16699483
arxiv/data/train/domain_01_38.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a5549fe18fa8d3d1d473cb8af7cf470813bcb84b5a05e5f189ac311a6a2af29
3
+ size 17512444
arxiv/data/train/domain_01_39.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3133fc8c983dd3e8ff5b5dcb581a1dbc131bdf37d639f540921f8bce2fac5c8f
3
+ size 16086076
arxiv/data/train/domain_01_4.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0089611d6243813bbbec15dd74c9149d08356ba1ba044e42937439c780a4158
3
+ size 16418274
arxiv/data/train/domain_01_40.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0854f772f7b9b3da4f89d4ea6f34a5dbae1184f5bbae85b6b6caa112f98e9607
3
+ size 17721869
arxiv/data/train/domain_01_41.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a544003097e158425af89961090c61209ed90cca7ea0c96afbffc79a529f277c
3
+ size 16088305
arxiv/data/train/domain_01_42.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03dc5e33e2910333ec8f1f2ecf8c68f677f733e033342173dc3088ca6e402682
3
+ size 17629572
arxiv/data/train/domain_01_43.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e31c06b0027783ec3e46c1270beadd99b354723b7e23c1a7d4630c002f9bab
3
+ size 16331519
arxiv/data/train/domain_01_44.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d1db760a90458505795db09537d8ed70916c0e648726d3eaa05d4c73ec7086
3
+ size 15958693
arxiv/data/train/domain_01_45.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a108f61499b7b94f9deb85ea834b999c89254d158e24682422166431fa9690b
3
+ size 15578437
arxiv/data/train/domain_01_46.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17b104276159bcff93aa94fbf37946973e67c8391f279f96b98b389934aa803
3
+ size 16044730
arxiv/data/train/domain_01_47.jsonl.zst ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43388236eb1ef112f15aa20bac18b307d4000d3dd1ce10b892d77e42dd0a164b
3
+ size 16537221