agripredict / remove_recent_data.py
ThejasRao's picture
Upload 5 files
3029a46 verified
"""Script to remove data after October 25, 2025 from MongoDB for testing the scraper."""
from datetime import datetime
from src.agri_predict.config import get_collections
def remove_data_after_date(cutoff_date_str="2025-10-25"):
"""Remove all data after the specified date.
Args:
cutoff_date_str: Date string in format YYYY-MM-DD
"""
cutoff_date = datetime.strptime(cutoff_date_str, "%Y-%m-%d")
cols = get_collections()
collection = cols['collection']
# Count documents before deletion
before_count = collection.count_documents({})
after_cutoff_count = collection.count_documents({
"Reported Date": {"$gt": cutoff_date}
})
print(f"πŸ“Š Database Status:")
print(f" Total documents: {before_count}")
print(f" Documents after {cutoff_date_str}: {after_cutoff_count}")
if after_cutoff_count == 0:
print(f"βœ… No documents found after {cutoff_date_str}")
return
# Delete documents
result = collection.delete_many({
"Reported Date": {"$gt": cutoff_date}
})
print(f"\nπŸ—‘οΈ Deletion Results:")
print(f" Deleted {result.deleted_count} documents")
# Verify deletion
remaining_count = collection.count_documents({})
latest_doc = collection.find_one(sort=[("Reported Date", -1)])
print(f"\nβœ… After Deletion:")
print(f" Total documents: {remaining_count}")
if latest_doc:
latest_date = latest_doc.get("Reported Date")
print(f" Latest date in database: {latest_date.strftime('%Y-%m-%d') if latest_date else 'Unknown'}")
else:
print(f" Database is empty")
if __name__ == "__main__":
print("="*60)
print("🧹 Cleaning MongoDB Data After 2025-10-25")
print("="*60 + "\n")
remove_data_after_date("2025-10-10")
print("\n" + "="*60)
print("βœ… Cleanup Complete - Ready to test scraper!")
print("="*60)