Spaces:
Sleeping
Sleeping
File size: 3,271 Bytes
63069dd 1a755c0 63069dd 1a755c0 63069dd 1a755c0 63069dd 1a755c0 63069dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import asyncio
import aiohttp
import sys
import base64
async def test_pdf_extraction():
# Check if filename is provided as argument
if len(sys.argv) < 2:
print("Usage: python test_pdf_endpoint.py <pdf_filename> [page_numbers]")
return
pdf_filename = sys.argv[1]
page_numbers = sys.argv[2] if len(sys.argv) > 2 else None
# Read the PDF file
try:
with open(pdf_filename, 'rb') as f:
pdf_content = f.read()
except FileNotFoundError:
print(f"Error: File '{pdf_filename}' not found.")
return
except Exception as e:
print(f"Error reading file: {e}")
return
# Test regular file upload endpoint
print("\n--- Testing file upload endpoint ---")
url = "http://localhost:8000/extract-text"
if page_numbers:
url += f"?page_numbers={page_numbers}"
try:
async with aiohttp.ClientSession() as session:
data = aiohttp.FormData()
data.add_field('file', pdf_content, filename=pdf_filename, content_type='application/pdf')
async with session.post(
url,
data=data
) as response:
result = await response.json()
print(f"Status code: {response.status}")
if response.status == 200:
print(f"Successfully extracted text from {result['filename']}")
print(f"Text length: {len(result['text'])} characters")
# Print first 500 characters of extracted text
print(f"First 500 characters: {result['text'][:500]}")
else:
print(f"Error: {result}")
except Exception as e:
print(f"Error connecting to server: {e}")
print("Make sure the FastAPI server is running on port 8000")
# Test base64 endpoint
print("\n--- Testing base64 endpoint ---")
base64_url = "http://localhost:8000/extract-text-base64"
try:
# Encode the PDF content to base64
base64_string = base64.b64encode(pdf_content).decode('utf-8')
# Create JSON payload
payload = {
"file": base64_string,
"filename": pdf_filename
}
if page_numbers:
payload["page_numbers"] = page_numbers
async with aiohttp.ClientSession() as session:
async with session.post(
base64_url,
json=payload
) as response:
result = await response.json()
print(f"Status code: {response.status}")
if response.status == 200:
print(f"Successfully extracted text from {result['filename']}")
print(f"Text length: {len(result['text'])} characters")
# Print first 500 characters of extracted text
print(f"First 500 characters: {result['text'][:500]}")
else:
print(f"Error: {result}")
except Exception as e:
print(f"Error connecting to server: {e}")
print("Make sure the FastAPI server is running on port 8000")
if __name__ == "__main__":
asyncio.run(test_pdf_extraction()) |