dd123 commited on
Commit
0111237
1 Parent(s): 9fd203c

Upload data.py

Browse files
Files changed (1) hide show
  1. logs/data.py +69 -0
logs/data.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """BANKING77 dataset."""
16
+
17
+ import json
18
+
19
+ import datasets
20
+ from datasets.tasks import TextClassification
21
+
22
+ _TRAIN_DOWNLOAD_URL = "https://raw.kgithub.com/freeziyou/test_data/main/data/train/train.json"
23
+ _TEST_DOWNLOAD_URL = "https://raw.kgithub.com/freeziyou/test_data/main/data/test/test.json"
24
+
25
+
26
+ class Data(datasets.GeneratorBasedBuilder):
27
+ VERSION = datasets.Version("1.0.0")
28
+
29
+ def _info(self):
30
+ return datasets.DatasetInfo(
31
+ description=None,
32
+ features=datasets.Features(
33
+ {
34
+ "text": datasets.Value("string"),
35
+ "label": datasets.features.ClassLabel(names=[
36
+ "none",
37
+ "like",
38
+ "unlike",
39
+ "hope",
40
+ "questioning",
41
+ "express_surprise",
42
+ "normal_interaction",
43
+ "express_sad",
44
+ "tease",
45
+ "meme",
46
+ "express_abashed"
47
+ ])
48
+ }
49
+ ),
50
+ homepage=None,
51
+ citation=None,
52
+ )
53
+
54
+ def _split_generators(self, dl_manager):
55
+ """Returns SplitGenerators."""
56
+ train_path = dl_manager.download_and_extract(_TRAIN_DOWNLOAD_URL)
57
+ test_path = dl_manager.download_and_extract(_TEST_DOWNLOAD_URL)
58
+ return [
59
+ datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": train_path}),
60
+ datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepath": test_path}),
61
+ ]
62
+
63
+ def _generate_examples(self, filepath):
64
+ """Yields examples as (key, example) tuples."""
65
+ with open(filepath, encoding="utf-8") as f:
66
+ data = json.load(f)
67
+ for id_, row in data:
68
+ text, label = row['text'], row['label']
69
+ yield id_, {"text": text, "label": label}