lakshmi97 commited on
Commit
b2659cd
1 Parent(s): 07c0de9

Delete wikiTokenisedValid

Browse files
wikiTokenisedValid/data-00000-of-00001.arrow DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1757fade9c380370679721c0adebfa70a535cfe7fcf73950b3b921616d6f97bd
3
- size 2458880
 
 
 
 
wikiTokenisedValid/dataset_info.json DELETED
@@ -1,75 +0,0 @@
1
- {
2
- "builder_name": "wikitext",
3
- "citation": "@misc{merity2016pointer,\n title={Pointer Sentinel Mixture Models},\n author={Stephen Merity and Caiming Xiong and James Bradbury and Richard Socher},\n year={2016},\n eprint={1609.07843},\n archivePrefix={arXiv},\n primaryClass={cs.CL}\n}\n",
4
- "config_name": "wikitext-103-raw-v1",
5
- "dataset_name": "wikitext",
6
- "dataset_size": 548965325,
7
- "description": " The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified\n Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike\n License.\n",
8
- "download_checksums": {
9
- "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip": {
10
- "num_bytes": 191984949,
11
- "checksum": null
12
- }
13
- },
14
- "download_size": 191984949,
15
- "features": {
16
- "input_ids": {
17
- "feature": {
18
- "dtype": "int32",
19
- "_type": "Value"
20
- },
21
- "_type": "Sequence"
22
- },
23
- "token_type_ids": {
24
- "feature": {
25
- "dtype": "int8",
26
- "_type": "Value"
27
- },
28
- "_type": "Sequence"
29
- },
30
- "attention_mask": {
31
- "feature": {
32
- "dtype": "int8",
33
- "_type": "Value"
34
- },
35
- "_type": "Sequence"
36
- },
37
- "next_sentence_label": {
38
- "dtype": "int64",
39
- "_type": "Value"
40
- }
41
- },
42
- "homepage": "https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/",
43
- "license": "Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)",
44
- "size_in_bytes": 740950274,
45
- "splits": {
46
- "test": {
47
- "name": "test",
48
- "num_bytes": 1305088,
49
- "num_examples": 4358,
50
- "dataset_name": "wikitext"
51
- },
52
- "train": {
53
- "name": "train",
54
- "num_bytes": 546500949,
55
- "num_examples": 1801350,
56
- "shard_lengths": [
57
- 1649000,
58
- 152350
59
- ],
60
- "dataset_name": "wikitext"
61
- },
62
- "validation": {
63
- "name": "validation",
64
- "num_bytes": 1159288,
65
- "num_examples": 3760,
66
- "dataset_name": "wikitext"
67
- }
68
- },
69
- "version": {
70
- "version_str": "1.0.0",
71
- "major": 1,
72
- "minor": 0,
73
- "patch": 0
74
- }
75
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wikiTokenisedValid/readme.md DELETED
File without changes
wikiTokenisedValid/state.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "_data_files": [
3
- {
4
- "filename": "data-00000-of-00001.arrow"
5
- }
6
- ],
7
- "_fingerprint": "faf39447b7393dbe",
8
- "_format_columns": null,
9
- "_format_kwargs": {},
10
- "_format_type": null,
11
- "_output_all_columns": false,
12
- "_split": "validation"
13
- }