docling-processor / test-scripts /test_docstrange.py
arjunbhargav212's picture
Upload 12 files
ad5d213 verified
"""
Test DocStrange Hugging Face API
Usage: python test_docstrange.py <HF_API_URL>
"""
import sys
import requests
import json
import os
if len(sys.argv) < 2:
print("Usage: python test_docstrange.py <HF_API_URL>")
print("Example: python test_docstrange.py https://your-username-docstrange.hf.space")
sys.exit(1)
HF_URL = sys.argv[1].rstrip('/')
print(f"\n{'='*60}")
print(f"Testing DocStrange API: {HF_URL}")
print(f"{'='*60}\n")
# Test 1: Health check
print("1. Testing health check...")
try:
resp = requests.get(f"{HF_URL}/")
print(f" Status: {resp.status_code}")
print(f" Response: {resp.json()}")
print(f" βœ… Health check passed!\n")
except Exception as e:
print(f" ❌ Failed: {e}\n")
sys.exit(1)
# Test 2: Check for test PDF
test_pdf = "test.pdf"
if not os.path.exists(test_pdf):
print(f"⚠️ No test.pdf found. Please add a test PDF to this directory.")
print(f" Or check API docs at: {HF_URL}/docs")
sys.exit(0)
# Test 3: Full extraction
print(f"2. Testing document extraction with {test_pdf}...")
try:
with open(test_pdf, 'rb') as f:
resp = requests.post(
f"{HF_URL}/extract",
files={"file": f},
timeout=120
)
print(f" Status: {resp.status_code}")
if resp.status_code == 200:
data = resp.json()
print(f" βœ… Success!")
print(f" File: {data.get('file_name')}")
print(f" Format: {data.get('format')}")
print(f" Metadata: {json.dumps(data.get('metadata', {}), indent=2)}")
# Preview data
doc_data = data.get('data', {})
if isinstance(doc_data, str):
print(f"\n Preview (first 200 chars):")
print(f" {doc_data[:200]}...")
elif isinstance(doc_data, dict):
print(f"\n Data keys: {list(doc_data.keys())}")
else:
print(f" ❌ Failed: {resp.text}\n")
except Exception as e:
print(f" ❌ Failed: {e}\n")
print(f"\n{'='*60}")
print("Test complete!")
print(f"{'='*60}\n")