Spaces:
Runtime error
Runtime error
| import modal | |
| import os | |
| import urllib.parse | |
| app = modal.App("fix-csv-filenames") | |
| # Volumes | |
| census_volume = modal.Volume.from_name("census-data") | |
| economy_volume = modal.Volume.from_name("economy-labor-data") | |
| image = modal.Image.debian_slim() | |
| def clean_filename(filename: str) -> str: | |
| """Cleans up the filename by removing garbage prefixes.""" | |
| # 1. Unquote URL encoding | |
| # e.g. attachment%3B%20filename*%3DUTF-8%27%27a01e... -> attachment; filename*=UTF-8''a01e... | |
| cleaned = urllib.parse.unquote(filename) | |
| # 2. Remove common garbage prefixes | |
| prefixes = [ | |
| "attachment; filename*=UTF-8''", | |
| "attachment; filename=", | |
| "attachment;", | |
| ] | |
| for prefix in prefixes: | |
| if cleaned.startswith(prefix): | |
| cleaned = cleaned[len(prefix):] | |
| # 3. Clean up any remaining quotes or whitespace | |
| cleaned = cleaned.strip('"\' ') | |
| return cleaned | |
| def process_volume(volume_path: str, volume_obj: modal.Volume) -> dict: | |
| """Renames files in the volume.""" | |
| renamed_count = 0 | |
| errors = 0 | |
| print(f"Scanning {volume_path}...") | |
| for root, _, files in os.walk(volume_path): | |
| for filename in files: | |
| if not filename.lower().endswith('.csv'): | |
| continue | |
| new_name = clean_filename(filename) | |
| if new_name != filename: | |
| old_path = os.path.join(root, filename) | |
| new_path = os.path.join(root, new_name) | |
| # Avoid overwriting if target exists (unless it's the same file) | |
| if os.path.exists(new_path) and new_path != old_path: | |
| print(f"Skipping rename {filename} -> {new_name} (Target exists)") | |
| continue | |
| try: | |
| os.rename(old_path, new_path) | |
| renamed_count += 1 | |
| # print(f"Renamed: {filename} -> {new_name}") | |
| except Exception as e: | |
| print(f"Error renaming {filename}: {e}") | |
| errors += 1 | |
| volume_obj.commit() | |
| return {"renamed": renamed_count, "errors": errors} | |
| def fix_census(): | |
| return process_volume("/data/census", census_volume) | |
| def fix_economy(): | |
| return process_volume("/data/economy", economy_volume) | |
| def main(): | |
| print("Fixing Census filenames...") | |
| census_res = fix_census.remote() | |
| print(f"Census: Renamed {census_res['renamed']} files. Errors: {census_res['errors']}") | |
| print("Fixing Economy filenames...") | |
| economy_res = fix_economy.remote() | |
| print(f"Economy: Renamed {economy_res['renamed']} files. Errors: {economy_res['errors']}") | |