# coding=utf-8 # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """BANKING77 dataset.""" import json import datasets from datasets.tasks import TextClassification _TRAIN_DOWNLOAD_URL = "https://raw.kgithub.com/freeziyou/test_data/main/data/train/train.json" _TEST_DOWNLOAD_URL = "https://raw.kgithub.com/freeziyou/test_data/main/data/test/test.json" class Data(datasets.GeneratorBasedBuilder): VERSION = datasets.Version("1.0.0") def _info(self): return datasets.DatasetInfo( description=None, features=datasets.Features( { "text": datasets.Value("string"), "label": datasets.features.ClassLabel(names=[ "none", "like", "unlike", "hope", "questioning", "express_surprise", "normal_interaction", "express_sad", "tease", "meme", "express_abashed" ]) } ), homepage=None, citation=None, ) def _split_generators(self, dl_manager): """Returns SplitGenerators.""" train_path = dl_manager.download_and_extract(_TRAIN_DOWNLOAD_URL) test_path = dl_manager.download_and_extract(_TEST_DOWNLOAD_URL) return [ datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": train_path}), datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepath": test_path}), ] def _generate_examples(self, filepath): """Yields examples as (key, example) tuples.""" with open(filepath, encoding="utf-8") as f: data = json.load(f) for id_, row in data: text, label = row['text'], row['label'] yield id_, {"text": text, "label": label}