soutrik
orphan branch
c3d82b0
raw
history blame
3.13 kB
import boto3
import os
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
# Load environment variables from .env file
load_dotenv(find_dotenv(".env"))
class S3Handler:
def __init__(self, bucket_name):
self.bucket_name = bucket_name
self.s3 = boto3.client(
"s3",
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
region_name=os.getenv("AWS_REGION"),
)
def upload_folder(self, source_folder, dest_folder, filenames=None):
"""
Upload specified files or all files from a local folder to an S3 folder.
Args:
source_folder (str): Local source folder path.
dest_folder (str): Destination folder path in S3.
filenames (list): List of filenames to upload (relative to source_folder). If None, uploads all files.
"""
source_folder = Path(source_folder)
# Select files based on filenames list or all files if filenames is None
files_to_upload = (
[source_folder / file for file in filenames]
if filenames
else list(source_folder.rglob("*"))
)
for file_path in files_to_upload:
if file_path.is_file():
s3_path = f"{dest_folder}/{file_path.relative_to(source_folder)}"
self.s3.upload_file(str(file_path), self.bucket_name, s3_path)
print(f"Uploaded: {file_path} to {s3_path}")
else:
print(f"File not found: {file_path}")
def download_folder(self, s3_folder, dest_folder):
"""
Download all files from an S3 folder to a local folder.
Args:
s3_folder (str): Source folder in S3.
dest_folder (str): Local destination folder path.
"""
dest_folder = Path(dest_folder).resolve()
paginator = self.s3.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=self.bucket_name, Prefix=s3_folder):
for obj in page.get("Contents", []):
s3_path = obj["Key"]
# Skip folder itself if returned by S3
if s3_path.endswith("/"):
continue
# Compute relative path and local destination
relative_path = Path(s3_path[len(s3_folder) :].lstrip("/"))
local_path = dest_folder / relative_path
# Create necessary local directories
local_path.parent.mkdir(parents=True, exist_ok=True)
# Download file
self.s3.download_file(self.bucket_name, s3_path, str(local_path))
print(f"Downloaded: {s3_path} to {local_path}")
# Usage Example
if __name__ == "__main__":
# Initialize with bucket name
s3_handler = S3Handler(bucket_name="deep-bucket-s3")
# Upload specific files
s3_handler.upload_folder(
"checkpoints_test",
"checkpoints_test",
)
# Download example
s3_handler.download_folder("checkpoints_test", "checkpoints_test")