Delete wikiTokenisedValid
Browse files
wikiTokenisedValid/data-00000-of-00001.arrow
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1757fade9c380370679721c0adebfa70a535cfe7fcf73950b3b921616d6f97bd
|
3 |
-
size 2458880
|
|
|
|
|
|
|
|
wikiTokenisedValid/dataset_info.json
DELETED
@@ -1,75 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"builder_name": "wikitext",
|
3 |
-
"citation": "@misc{merity2016pointer,\n title={Pointer Sentinel Mixture Models},\n author={Stephen Merity and Caiming Xiong and James Bradbury and Richard Socher},\n year={2016},\n eprint={1609.07843},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n",
|
4 |
-
"config_name": "wikitext-103-raw-v1",
|
5 |
-
"dataset_name": "wikitext",
|
6 |
-
"dataset_size": 548965325,
|
7 |
-
"description": " The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified\n Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike\n License.\n",
|
8 |
-
"download_checksums": {
|
9 |
-
"https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip": {
|
10 |
-
"num_bytes": 191984949,
|
11 |
-
"checksum": null
|
12 |
-
}
|
13 |
-
},
|
14 |
-
"download_size": 191984949,
|
15 |
-
"features": {
|
16 |
-
"input_ids": {
|
17 |
-
"feature": {
|
18 |
-
"dtype": "int32",
|
19 |
-
"_type": "Value"
|
20 |
-
},
|
21 |
-
"_type": "Sequence"
|
22 |
-
},
|
23 |
-
"token_type_ids": {
|
24 |
-
"feature": {
|
25 |
-
"dtype": "int8",
|
26 |
-
"_type": "Value"
|
27 |
-
},
|
28 |
-
"_type": "Sequence"
|
29 |
-
},
|
30 |
-
"attention_mask": {
|
31 |
-
"feature": {
|
32 |
-
"dtype": "int8",
|
33 |
-
"_type": "Value"
|
34 |
-
},
|
35 |
-
"_type": "Sequence"
|
36 |
-
},
|
37 |
-
"next_sentence_label": {
|
38 |
-
"dtype": "int64",
|
39 |
-
"_type": "Value"
|
40 |
-
}
|
41 |
-
},
|
42 |
-
"homepage": "https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/",
|
43 |
-
"license": "Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)",
|
44 |
-
"size_in_bytes": 740950274,
|
45 |
-
"splits": {
|
46 |
-
"test": {
|
47 |
-
"name": "test",
|
48 |
-
"num_bytes": 1305088,
|
49 |
-
"num_examples": 4358,
|
50 |
-
"dataset_name": "wikitext"
|
51 |
-
},
|
52 |
-
"train": {
|
53 |
-
"name": "train",
|
54 |
-
"num_bytes": 546500949,
|
55 |
-
"num_examples": 1801350,
|
56 |
-
"shard_lengths": [
|
57 |
-
1649000,
|
58 |
-
152350
|
59 |
-
],
|
60 |
-
"dataset_name": "wikitext"
|
61 |
-
},
|
62 |
-
"validation": {
|
63 |
-
"name": "validation",
|
64 |
-
"num_bytes": 1159288,
|
65 |
-
"num_examples": 3760,
|
66 |
-
"dataset_name": "wikitext"
|
67 |
-
}
|
68 |
-
},
|
69 |
-
"version": {
|
70 |
-
"version_str": "1.0.0",
|
71 |
-
"major": 1,
|
72 |
-
"minor": 0,
|
73 |
-
"patch": 0
|
74 |
-
}
|
75 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wikiTokenisedValid/readme.md
DELETED
File without changes
|
wikiTokenisedValid/state.json
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_data_files": [
|
3 |
-
{
|
4 |
-
"filename": "data-00000-of-00001.arrow"
|
5 |
-
}
|
6 |
-
],
|
7 |
-
"_fingerprint": "faf39447b7393dbe",
|
8 |
-
"_format_columns": null,
|
9 |
-
"_format_kwargs": {},
|
10 |
-
"_format_type": null,
|
11 |
-
"_output_all_columns": false,
|
12 |
-
"_split": "validation"
|
13 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|