| | def upload_to_hf_dataset(file_path, dataset_name, token, repo_type="dataset"): |
| | """ |
| | Upload a file to a Hugging Face dataset repository. |
| | |
| | Args: |
| | file_path (str): Path to the file to upload |
| | dataset_name (str): Name of the dataset in format 'username/dataset-name' |
| | token (str): Hugging Face API token |
| | repo_type (str): Repository type, defaults to 'dataset' |
| | """ |
| | from huggingface_hub import HfApi |
| | import os |
| |
|
| | |
| | api = HfApi() |
| |
|
| | try: |
| | |
| | api.upload_file( |
| | path_or_fileobj=file_path, |
| | path_in_repo=os.path.basename(file_path), |
| | repo_id=dataset_name, |
| | repo_type=repo_type, |
| | token=token, |
| | commit_message=f"Upload {os.path.basename(file_path)}", |
| | commit_description=f"Automated upload of {os.path.basename(file_path)} to dataset", |
| | ) |
| | print(f"Successfully uploaded {file_path} to {dataset_name}") |
| | except Exception as e: |
| | print(f"Error uploading file: {str(e)}") |
| |
|
| |
|
| | def download_from_hf_dataset(file_path, dataset_name, token, repo_type="dataset"): |
| | """ |
| | Download a file from a Hugging Face dataset repository. |
| | |
| | Args: |
| | file_path (str): Path in the repository to download from |
| | dataset_name (str): Name of the dataset in format 'username/dataset-name' |
| | token (str): Hugging Face API token |
| | repo_type (str): Repository type, defaults to 'dataset' |
| | """ |
| | from huggingface_hub import HfApi |
| | import os |
| |
|
| | |
| | api = HfApi() |
| |
|
| | try: |
| | |
| | api.hf_hub_download( |
| | repo_id=dataset_name, |
| | filename=file_path, |
| | repo_type=repo_type, |
| | local_dir=".", |
| | token=token, |
| | ) |
| | print(f"Successfully downloaded {file_path} from {dataset_name}") |
| | except Exception as e: |
| | print(f"Error downloading file: {str(e)}") |
| |
|
| |
|
| | def load_hf_dataset(csv_filename, token, dataset_name_input): |
| | """ |
| | Load a CSV dataset from Hugging Face and return as pandas DataFrame |
| | |
| | Args: |
| | csv_filename (str): Name of the CSV file in the dataset |
| | token (str): Hugging Face authentication token |
| | |
| | Returns: |
| | pandas.DataFrame: DataFrame containing the dataset |
| | """ |
| | from datasets import load_dataset |
| |
|
| | try: |
| | dataset = load_dataset( |
| | dataset_name_input, data_files=csv_filename, split="train", token=token |
| | ) |
| | return dataset.to_pandas() |
| | except Exception as e: |
| | print(f"Error loading dataset: {e}") |
| | return None |
| |
|