Spaces:
Sleeping
Sleeping
| """Script to remove data after October 25, 2025 from MongoDB for testing the scraper.""" | |
| from datetime import datetime | |
| from src.agri_predict.config import get_collections | |
| def remove_data_after_date(cutoff_date_str="2025-10-25"): | |
| """Remove all data after the specified date. | |
| Args: | |
| cutoff_date_str: Date string in format YYYY-MM-DD | |
| """ | |
| cutoff_date = datetime.strptime(cutoff_date_str, "%Y-%m-%d") | |
| cols = get_collections() | |
| collection = cols['collection'] | |
| # Count documents before deletion | |
| before_count = collection.count_documents({}) | |
| after_cutoff_count = collection.count_documents({ | |
| "Reported Date": {"$gt": cutoff_date} | |
| }) | |
| print(f"π Database Status:") | |
| print(f" Total documents: {before_count}") | |
| print(f" Documents after {cutoff_date_str}: {after_cutoff_count}") | |
| if after_cutoff_count == 0: | |
| print(f"β No documents found after {cutoff_date_str}") | |
| return | |
| # Delete documents | |
| result = collection.delete_many({ | |
| "Reported Date": {"$gt": cutoff_date} | |
| }) | |
| print(f"\nποΈ Deletion Results:") | |
| print(f" Deleted {result.deleted_count} documents") | |
| # Verify deletion | |
| remaining_count = collection.count_documents({}) | |
| latest_doc = collection.find_one(sort=[("Reported Date", -1)]) | |
| print(f"\nβ After Deletion:") | |
| print(f" Total documents: {remaining_count}") | |
| if latest_doc: | |
| latest_date = latest_doc.get("Reported Date") | |
| print(f" Latest date in database: {latest_date.strftime('%Y-%m-%d') if latest_date else 'Unknown'}") | |
| else: | |
| print(f" Database is empty") | |
| if __name__ == "__main__": | |
| print("="*60) | |
| print("π§Ή Cleaning MongoDB Data After 2025-10-25") | |
| print("="*60 + "\n") | |
| remove_data_after_date("2025-10-10") | |
| print("\n" + "="*60) | |
| print("β Cleanup Complete - Ready to test scraper!") | |
| print("="*60) | |