import asyncio import aiohttp import sys import base64 async def test_pdf_extraction(): # Check if filename is provided as argument if len(sys.argv) < 2: print("Usage: python test_pdf_endpoint.py [page_numbers]") return pdf_filename = sys.argv[1] page_numbers = sys.argv[2] if len(sys.argv) > 2 else None # Read the PDF file try: with open(pdf_filename, 'rb') as f: pdf_content = f.read() except FileNotFoundError: print(f"Error: File '{pdf_filename}' not found.") return except Exception as e: print(f"Error reading file: {e}") return # Test regular file upload endpoint print("\n--- Testing file upload endpoint ---") url = "http://localhost:8000/extract-text" if page_numbers: url += f"?page_numbers={page_numbers}" try: async with aiohttp.ClientSession() as session: data = aiohttp.FormData() data.add_field('file', pdf_content, filename=pdf_filename, content_type='application/pdf') async with session.post( url, data=data ) as response: result = await response.json() print(f"Status code: {response.status}") if response.status == 200: print(f"Successfully extracted text from {result['filename']}") print(f"Text length: {len(result['text'])} characters") # Print first 500 characters of extracted text print(f"First 500 characters: {result['text'][:500]}") else: print(f"Error: {result}") except Exception as e: print(f"Error connecting to server: {e}") print("Make sure the FastAPI server is running on port 8000") # Test base64 endpoint print("\n--- Testing base64 endpoint ---") base64_url = "http://localhost:8000/extract-text-base64" try: # Encode the PDF content to base64 base64_string = base64.b64encode(pdf_content).decode('utf-8') # Create JSON payload payload = { "file": base64_string, "filename": pdf_filename } if page_numbers: payload["page_numbers"] = page_numbers async with aiohttp.ClientSession() as session: async with session.post( base64_url, json=payload ) as response: result = await response.json() print(f"Status code: {response.status}") if response.status == 200: print(f"Successfully extracted text from {result['filename']}") print(f"Text length: {len(result['text'])} characters") # Print first 500 characters of extracted text print(f"First 500 characters: {result['text'][:500]}") else: print(f"Error: {result}") except Exception as e: print(f"Error connecting to server: {e}") print("Make sure the FastAPI server is running on port 8000") if __name__ == "__main__": asyncio.run(test_pdf_extraction())