mdevoz
/

tanadata

+import json
+import datasets
+# You can update these with more detailed information.
+_DESCRIPTION = """
+TanaData is a custom dataset for instruction-response tasks.
+"""
+_CITATION = """
+@misc{tanadata2025,
+  title={TanaData Dataset},
+  year={2025},
+  note={Custom dataset hosted on Hugging Face}
+}
+"""
+class TanaData(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("1.0.0")
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features({
+                "instruction": datasets.Value("string"),
+                "input": datasets.Value("string"),
+                "output": datasets.Value("string"),
+            }),
+            supervised_keys=None,
+            homepage="https://huggingface.co/mdevoz/tanadata",
+            citation=_CITATION,
+        )
+    def _split_generators(self, dl_manager):
+        # This URL points to your JSON file in the repository.
+        file_path = dl_manager.download_and_extract(
+            "https://huggingface.co/mdevoz/tanadata/resolve/main/tana_z.json"
+        )
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={"filepath": file_path}
+            )
+        ]
+    def _generate_examples(self, filepath):
+        # Adjust this logic based on your JSON file structure.
+        with open(filepath, encoding="utf-8") as f:
+            # If your file is a JSON array of examples:
+            data = json.load(f)
+            for idx, example in enumerate(data):
+                yield idx, example
+# For testing, you can uncomment the following lines locally:
+# if __name__ == "__main__":
+#     from datasets import load_dataset
+#     dataset = load_dataset(__file__, name="tanadata")
+#     print(dataset)