Spaces:
Sleeping
Sleeping
File size: 3,448 Bytes
6b2dcd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# -*- coding: utf-8 -*-
# Copyright (c) Louis Brulé Naudet. All Rights Reserved.
# This software may be used and distributed according to the terms of the License Agreement.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datasets
import polars as pl
class Dataset:
@staticmethod
def load(
dataset_path:str
):
"""
Load a dataset from disk.
Parameters
----------
dataset_path : str
The path to the dataset on disk.
Returns
-------
datasets.Dataset
The loaded dataset.
Notes
-----
This method statically loads a dataset from disk using the `load_from_disk` function
provided by the `datasets` module. The dataset is expected to be stored in a specific
format supported by the `datasets` library.
Example
-------
>>> dataset_path = "/path/to/dataset"
>>> dataset = Dataset.load(dataset_path)
"""
dataset = datasets.load_from_disk(
dataset_path=dataset_path
)
return dataset
@staticmethod
def save(
dataset: datasets.Dataset,
dataset_path: str
) -> None:
"""
Save a dataset to disk.
Parameters
----------
dataset : datasets.Dataset
The dataset to be saved.
dataset_path : str
The path where the dataset will be saved on disk.
Returns
-------
None
Notes
-----
This method statically saves a dataset to disk using the `save_to_disk` function
provided by the `datasets` module. The dataset is expected to be in a format
supported by the `datasets` library.
Example
-------
>>> dataset = load_dataset("my_dataset")
>>> dataset_path = "/path/to/save/dataset"
>>> Dataset.save(dataset, dataset_path)
"""
datasets.save_to_disk(
dataset,
dataset_path
)
return None
@staticmethod
def convert_to_polars(
dataset: datasets.Dataset
) -> pl.DataFrame:
"""
Convert a dataset to a Polars DataFrame.
Parameters
----------
dataset : datasets.Dataset
The dataset to be converted to a Polars DataFrame.
Returns
-------
pl.DataFrame
A Polars DataFrame representing the dataset.
Notes
-----
This method converts a dataset object to a Polars DataFrame, which is a
memory-efficient and fast data manipulation library for Rust.
Raises
------
Exception
If an error occurs during the conversion process.
Examples
--------
>>> dataset = datasets.Dataset(data=arrow_table)
>>> dataframe = ClassName.convert_to_polars(dataset)
"""
try:
dataframe = pl.from_arrow(dataset.data.table).with_row_index()
except:
dataframe = pl.from_arrow(dataset.data.table).with_row_count(
name="index"
)
return dataframe
|