Spaces:
Sleeping
Sleeping
| import asyncio | |
| import aiohttp | |
| import sys | |
| import base64 | |
| async def test_pdf_extraction(): | |
| # Check if filename is provided as argument | |
| if len(sys.argv) < 2: | |
| print("Usage: python test_pdf_endpoint.py <pdf_filename> [page_numbers]") | |
| return | |
| pdf_filename = sys.argv[1] | |
| page_numbers = sys.argv[2] if len(sys.argv) > 2 else None | |
| # Read the PDF file | |
| try: | |
| with open(pdf_filename, 'rb') as f: | |
| pdf_content = f.read() | |
| except FileNotFoundError: | |
| print(f"Error: File '{pdf_filename}' not found.") | |
| return | |
| except Exception as e: | |
| print(f"Error reading file: {e}") | |
| return | |
| # Test regular file upload endpoint | |
| print("\n--- Testing file upload endpoint ---") | |
| url = "http://localhost:8000/extract-text" | |
| if page_numbers: | |
| url += f"?page_numbers={page_numbers}" | |
| try: | |
| async with aiohttp.ClientSession() as session: | |
| data = aiohttp.FormData() | |
| data.add_field('file', pdf_content, filename=pdf_filename, content_type='application/pdf') | |
| async with session.post( | |
| url, | |
| data=data | |
| ) as response: | |
| result = await response.json() | |
| print(f"Status code: {response.status}") | |
| if response.status == 200: | |
| print(f"Successfully extracted text from {result['filename']}") | |
| print(f"Text length: {len(result['text'])} characters") | |
| # Print first 500 characters of extracted text | |
| print(f"First 500 characters: {result['text'][:500]}") | |
| else: | |
| print(f"Error: {result}") | |
| except Exception as e: | |
| print(f"Error connecting to server: {e}") | |
| print("Make sure the FastAPI server is running on port 8000") | |
| # Test base64 endpoint | |
| print("\n--- Testing base64 endpoint ---") | |
| base64_url = "http://localhost:8000/extract-text-base64" | |
| try: | |
| # Encode the PDF content to base64 | |
| base64_string = base64.b64encode(pdf_content).decode('utf-8') | |
| # Create JSON payload | |
| payload = { | |
| "file": base64_string, | |
| "filename": pdf_filename | |
| } | |
| if page_numbers: | |
| payload["page_numbers"] = page_numbers | |
| async with aiohttp.ClientSession() as session: | |
| async with session.post( | |
| base64_url, | |
| json=payload | |
| ) as response: | |
| result = await response.json() | |
| print(f"Status code: {response.status}") | |
| if response.status == 200: | |
| print(f"Successfully extracted text from {result['filename']}") | |
| print(f"Text length: {len(result['text'])} characters") | |
| # Print first 500 characters of extracted text | |
| print(f"First 500 characters: {result['text'][:500]}") | |
| else: | |
| print(f"Error: {result}") | |
| except Exception as e: | |
| print(f"Error connecting to server: {e}") | |
| print("Make sure the FastAPI server is running on port 8000") | |
| if __name__ == "__main__": | |
| asyncio.run(test_pdf_extraction()) |