Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "assemblyai", | |
| # "requests", | |
| # ] | |
| # /// | |
| """ | |
| Script to delete all transcripts from AssemblyAI account. | |
| This script will: | |
| 1. Fetch all transcripts using the paginated API | |
| 2. Delete each transcript individually | |
| 3. Provide progress updates and error handling | |
| Usage: | |
| python delete_all_transcripts.py | |
| Make sure to set your ASSEMBLYAI_API_KEY environment variable or | |
| modify the script to include your API key directly. | |
| """ | |
| import os | |
| import sys | |
| import time | |
| from typing import Any, Dict, List | |
| import assemblyai as aai | |
| import requests | |
| def get_api_key() -> str: | |
| """Get API key from environment variable or prompt user.""" | |
| api_key = os.getenv("ASSEMBLYAI_API_KEY") | |
| if not api_key: | |
| api_key = input("Enter your AssemblyAI API key: ").strip() | |
| if not api_key: | |
| print("Error: No API key provided") | |
| sys.exit(1) | |
| return api_key | |
| def fetch_all_transcripts(api_key: str) -> List[Dict[str, Any]]: | |
| """ | |
| Fetch all transcripts from AssemblyAI account using pagination. | |
| Returns: | |
| List of transcript dictionaries with at least 'id' field | |
| """ | |
| base_url = "https://api.assemblyai.com/v2/transcript" | |
| headers = {"Authorization": api_key} | |
| all_transcripts = [] | |
| current_url = base_url | |
| print("π Fetching all transcripts...") | |
| while current_url: | |
| try: | |
| response = requests.get(current_url, headers=headers) | |
| response.raise_for_status() | |
| data = response.json() | |
| transcripts = data.get("transcripts", []) | |
| all_transcripts.extend(transcripts) | |
| # Print progress | |
| total_fetched = len(all_transcripts) | |
| total_count = data.get("page_details", {}).get("result_count", "unknown") | |
| print(f" Fetched {total_fetched} transcripts (total: {total_count})") | |
| # Get next page URL | |
| current_url = data.get("page_details", {}).get("next_url") | |
| # Small delay to be respectful to the API | |
| time.sleep(0.1) | |
| except requests.exceptions.RequestException as e: | |
| print(f"β Error fetching transcripts: {e}") | |
| sys.exit(1) | |
| print(f"β Found {len(all_transcripts)} total transcripts") | |
| return all_transcripts | |
| def delete_transcript_by_id(transcript_id: str, api_key: str) -> bool: | |
| """ | |
| Delete a single transcript by ID. | |
| Returns: | |
| True if successful, False otherwise | |
| """ | |
| try: | |
| # Set API key for AssemblyAI SDK | |
| aai.settings.api_key = api_key | |
| # Delete using SDK method | |
| transcript = aai.Transcript.get_by_id(transcript_id) | |
| transcript.delete_by_id(transcript_id) | |
| return True | |
| except Exception as e: | |
| print(f" β Failed to delete {transcript_id}: {e}") | |
| return False | |
| def delete_all_transcripts(transcripts: List[Dict[str, Any]], api_key: str) -> None: | |
| """ | |
| Delete all transcripts with progress reporting. | |
| """ | |
| if not transcripts: | |
| print("β No transcripts to delete") | |
| return | |
| total_count = len(transcripts) | |
| print(f"\nποΈ Starting deletion of {total_count} transcripts...") | |
| # Ask for confirmation | |
| confirmation = input( | |
| f"\nβ οΈ WARNING: This will permanently delete {total_count} transcripts.\n" | |
| "Are you sure you want to continue? (type 'DELETE' to confirm): " | |
| ) | |
| if confirmation != "DELETE": | |
| print("β Deletion cancelled") | |
| return | |
| successful_deletions = 0 | |
| failed_deletions = 0 | |
| for i, transcript in enumerate(transcripts, 1): | |
| transcript_id = transcript.get("id") | |
| if not transcript_id: | |
| print(f" β οΈ Skipping transcript {i}: No ID found") | |
| failed_deletions += 1 | |
| continue | |
| print(f" Deleting {i}/{total_count}: {transcript_id}") | |
| if delete_transcript_by_id(transcript_id, api_key): | |
| successful_deletions += 1 | |
| else: | |
| failed_deletions += 1 | |
| # Progress update every 10 deletions | |
| if i % 10 == 0: | |
| print( | |
| f" Progress: {i}/{total_count} processed ({successful_deletions} successful, {failed_deletions} failed)" | |
| ) | |
| # Small delay to be respectful to the API | |
| time.sleep(0.2) | |
| print(f"\nπ Deletion Summary:") | |
| print(f" β Successfully deleted: {successful_deletions}") | |
| print(f" β Failed deletions: {failed_deletions}") | |
| print(f" π Total processed: {total_count}") | |
| if failed_deletions == 0: | |
| print("π All transcripts deleted successfully!") | |
| else: | |
| print( | |
| f"β οΈ {failed_deletions} transcripts could not be deleted. Check the error messages above." | |
| ) | |
| def main(): | |
| """Main function to orchestrate the deletion process.""" | |
| print("ποΈ AssemblyAI Transcript Deletion Tool") | |
| print("=====================================") | |
| # Get API key | |
| api_key = get_api_key() | |
| # Fetch all transcripts | |
| transcripts = fetch_all_transcripts(api_key) | |
| # Delete all transcripts | |
| delete_all_transcripts(transcripts, api_key) | |
| if __name__ == "__main__": | |
| main() | |