File size: 2,183 Bytes
aeb12b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
{
  "builder_name": "imdb",
  "citation": "@InProceedings{maas-EtAl:2011:ACL-HLT2011,\n  author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan  and  Ng, Andrew Y.  and  Potts, Christopher},\n  title     = {Learning Word Vectors for Sentiment Analysis},\n  booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},\n  month     = {June},\n  year      = {2011},\n  address   = {Portland, Oregon, USA},\n  publisher = {Association for Computational Linguistics},\n  pages     = {142--150},\n  url       = {http://www.aclweb.org/anthology/P11-1015}\n}\n",
  "config_name": "plain_text",
  "dataset_size": 133190302,
  "description": "Large Movie Review Dataset.\nThis is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.",
  "download_checksums": {
    "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz": {
      "num_bytes": 84125825,
      "checksum": null
    }
  },
  "download_size": 84125825,
  "features": {
    "text": {
      "dtype": "string",
      "_type": "Value"
    },
    "label": {
      "names": [
        "neg",
        "pos"
      ],
      "_type": "ClassLabel"
    }
  },
  "homepage": "http://ai.stanford.edu/~amaas/data/sentiment/",
  "license": "",
  "size_in_bytes": 217316127,
  "splits": {
    "train": {
      "name": "train",
      "num_bytes": 33432823,
      "num_examples": 25000,
      "dataset_name": "imdb"
    },
    "test": {
      "name": "test",
      "num_bytes": 32650685,
      "num_examples": 25000,
      "dataset_name": "imdb"
    },
    "unsupervised": {
      "name": "unsupervised",
      "num_bytes": 67106794,
      "num_examples": 50000,
      "dataset_name": "imdb"
    }
  },
  "task_templates": [
    {
      "task": "text-classification",
      "label_column": "label"
    }
  ],
  "version": {
    "version_str": "1.0.0",
    "description": "",
    "major": 1,
    "minor": 0,
    "patch": 0
  }
}