Spaces:

samwaugh
/

ArteFact

Sleeping

App Files Files Community

ArteFact / test_optimized_download.py

samwaugh

Try to speed up markdown download

33b499e 2 months ago

raw

history blame contribute delete

2.95 kB

	#!/usr/bin/env python3
	"""
	Test script for the optimized markdown download functionality.
	This script can be run to test the new parallel download approach.
	"""

	import os
	import sys
	import time
	from pathlib import Path

	# Add the backend directory to the Python path
	backend_dir = Path(__file__).parent / "backend"
	sys.path.insert(0, str(backend_dir))

	def test_optimized_download():
	"""Test the optimized markdown download"""
	try:
	from runner.config import (
	clear_markdown_cache,
	get_markdown_cache_info,
	_download_markdown_optimized
	)

	print("🧪 Testing optimized markdown download...")

	# Clear any existing cache
	print("🗑️ Clearing existing cache...")
	clear_markdown_cache()

	# Check cache info before download
	print("📊 Cache info before download:")
	cache_info_before = get_markdown_cache_info()
	print(f" Exists: {cache_info_before['exists']}")
	print(f" Works: {cache_info_before['work_count']}")
	print(f" Size: {cache_info_before['size_gb']}GB")

	# Start optimized download
	print("\n🚀 Starting optimized download...")
	start_time = time.time()

	# Get the works directory
	from runner.config import WRITE_ROOT
	works_dir = WRITE_ROOT / "markdown_cache" / "works"

	result = _download_markdown_optimized(works_dir)

	end_time = time.time()
	duration = end_time - start_time

	if result and result.exists():
	print(f"\n✅ Download completed successfully in {duration:.2f} seconds")

	# Check cache info after download
	print("📊 Cache info after download:")
	cache_info_after = get_markdown_cache_info()
	print(f" Exists: {cache_info_after['exists']}")
	print(f" Works: {cache_info_after['work_count']}")
	print(f" Size: {cache_info_after['size_gb']}GB")
	print(f" Files: {cache_info_after['file_count']}")

	# Calculate download rate
	if duration > 0:
	works_per_second = cache_info_after['work_count'] / duration
	print(f"📈 Download rate: {works_per_second:.2f} works/second")

	return True
	else:
	print("❌ Download failed")
	return False

	except Exception as e:
	print(f"❌ Test failed with error: {e}")
	import traceback
	traceback.print_exc()
	return False

	if __name__ == "__main__":
	print("🧪 ArteFact Optimized Download Test")
	print("=" * 50)

	success = test_optimized_download()

	if success:
	print("\n🎉 Test completed successfully!")
	sys.exit(0)
	else:
	print("\n💥 Test failed!")
	sys.exit(1)