Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
from datasets import load_dataset | |
def convert_embeddings(embeddings_str): | |
embeddings = [np.fromstring(e[1:-1], sep=' ') for e in embeddings_str] | |
return embeddings | |
class DatasetLoader: | |
def load_from_file(path: str, to_pandas=False): | |
if to_pandas: | |
return pd.read_csv(path) | |
else: | |
return load_dataset("csv", data_files=path)['train'] | |
def load_from_file_with_embeddings(path: str, to_pandas=False): | |
dataset = load_dataset("csv", data_files=path)['train'] | |
new_dataset = dataset.map( | |
lambda example: { | |
'embeddings': convert_embeddings(example['embeddings']) | |
}, | |
batched=True, | |
) | |
if to_pandas: | |
return new_dataset.to_pandas() | |
else: | |
return new_dataset | |
def load_from_huggingface(dataset_name: str, split: str, to_pandas=False): | |
# load dataset from HuggingFace hub | |
dataset = load_dataset(dataset_name, split=split) | |
if to_pandas: | |
return dataset.to_pandas() | |
else: | |
return dataset | |