Upload_response / upload_folder_response.py
JackyChunKit's picture
Create upload_folder_response.py
a22749b verified
from huggingface_hub import login, HfApi
import json
import argparse
import os
def upload_json_to_hf(token, repo_id, file_path, file_name):
# Login to Hugging Face
login(token)
# Initialize the API
api = HfApi()
# Upload the file
try:
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=file_name,
repo_id=repo_id,
repo_type="dataset"
)
print(f"Successfully uploaded {file_name} to {repo_id}")
except Exception as e:
print(f"Error uploading file: {str(e)}")
raise
def upload_folder_to_hf(token, repo_id, folder_path):
# Get all files in the folder
files = []
for root, _, filenames in os.walk(folder_path):
for filename in filenames:
if filename.endswith('.json'): # Only process JSON files
file_path = os.path.join(root, filename)
# Get relative path for the file in the repository
relative_path = os.path.relpath(file_path, folder_path)
files.append((file_path, relative_path))
# Upload each file
for file_path, relative_path in files:
try:
# Validate JSON format
with open(file_path, 'r') as f:
json.load(f)
# Upload file
upload_json_to_hf(token, repo_id, file_path, relative_path)
except json.JSONDecodeError:
print(f"Skipping invalid JSON file: {file_path}")
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
def main():
parser = argparse.ArgumentParser(description='Upload JSON files to Hugging Face')
# Add arguments
parser.add_argument(
'--token',
type=str,
help='Hugging Face access token (or set HUGGINGFACE_TOKEN env variable)',
default=os.getenv('HUGGINGFACE_TOKEN')
)
parser.add_argument(
'--repo-id',
type=str,
required=True,
help='Repository ID (format: username/repo-name)'
)
parser.add_argument(
'--folder-path',
type=str,
required=True,
help='Path to the folder containing JSON files'
)
# Parse arguments
args = parser.parse_args()
# Validate token
if not args.token:
raise ValueError("Please provide a token either via --token or HUGGINGFACE_TOKEN environment variable")
# Validate folder exists
if not os.path.exists(args.folder_path):
raise FileNotFoundError(f"Folder not found: {args.folder_path}")
if not os.path.isdir(args.folder_path):
raise NotADirectoryError(f"Path is not a directory: {args.folder_path}")
# Upload files
upload_folder_to_hf(args.token, args.repo_id, args.folder_path)
if __name__ == "__main__":
main()