Spaces:
Runtime error
Runtime error
| from flask import Flask, request, jsonify | |
| import requests | |
| import time | |
| import json | |
| import supabase | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| # Azure Document Intelligence setup | |
| AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/" | |
| AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC" | |
| # Supabase setup | |
| SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/" | |
| SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcyNTQ0OTczMiwiZXhwIjoyMDQxMDI1NzMyfQ.bMjGnnuYNlSEyaSWLNf_aOOebvDhFirPDr6zXjMHs64" | |
| supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY) | |
| app = Flask(__name__) | |
| def log_debug(message, **kwargs): | |
| """Log debug messages for tracking.""" | |
| print(f"[DEBUG] {message}") | |
| if kwargs: | |
| for key, value in kwargs.items(): | |
| print(f" - {key}: {value}") | |
| def download_file_from_supabase(file_path): | |
| # """Download file from Supabase storage.""" | |
| response = supabase_client.storage.from_("files").download(file_path) | |
| # No need to check status_code here, as response.content is the file content (bytes). | |
| if isinstance(response, bytes): # Direct check if response is file content. | |
| return response | |
| else: | |
| raise Exception(f"Failed to download file from Supabase: {response.text}") | |
| def analyze_pdf_layout(file_content): | |
| # """Send PDF to Azure and get layout data.""" | |
| url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31" | |
| headers = { | |
| "Ocp-Apim-Subscription-Key": AZURE_KEY, | |
| "Content-Type": "application/pdf", | |
| } | |
| response = requests.post(url, headers=headers, data=file_content) | |
| if response.status_code != 202: | |
| raise Exception(f"Azure request failed: {response.text}") | |
| operation_location = response.headers.get("Operation-Location") | |
| if not operation_location: | |
| raise Exception("Operation-Location header not found in response.") | |
| while True: | |
| result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY}) | |
| result = result_response.json() | |
| if result.get("status") == "succeeded": | |
| return result["analyzeResult"] | |
| elif result.get("status") == "failed": | |
| raise Exception("Analysis failed.") | |
| time.sleep(8) | |
| def analyze(): | |
| try: | |
| # Get file ID from request | |
| file_id = request.json.get("file_id") | |
| if not file_id: | |
| return jsonify({"error": "File ID is required"}), 400 | |
| # Fetch file path from Supabase | |
| file_data = supabase_client.table("files").select("file_path").eq("id", file_id).single().execute() | |
| # Check if file_data is not None and contains valid data | |
| if not file_data or not file_data.data: | |
| return jsonify({"error": "File not found or Supabase query failed"}), 404 | |
| file_path = file_data.data["file_path"] | |
| # Download the file from Supabase | |
| file_content = download_file_from_supabase(file_path) | |
| # Analyze the PDF layout with Azure | |
| layout_data = analyze_pdf_layout(file_content) | |
| # Extract required layout values | |
| page_data = layout_data.get("pages", [])[0] # Assuming single-page PDF for simplicity | |
| first_word = page_data.get("words", [])[0] | |
| last_word = page_data.get("words", [])[-1] | |
| page_height = page_data["height"] | |
| page_width = page_data["width"] | |
| x1 = first_word["polygon"][0] # X1 of first word | |
| y4 = last_word["polygon"][-1] # Y4 of last word | |
| # Commenting out the Supabase file update logic | |
| update_response = supabase_client.table("files").update({ | |
| "page_height": page_height, | |
| "page_width": page_width, | |
| "x1": x1, | |
| "y4": y4, | |
| }).eq("id", file_id).execute() | |
| # Check if update was successful by checking if data is present and valid | |
| if not update_response.data: | |
| return jsonify({"error": "Failed to update file layout data"}), 500 | |
| # Check if there is any error message in the response | |
| if hasattr(update_response, 'error') and update_response.error: | |
| return jsonify({"error": "Failed to update file layout data", "details": update_response.error}), 500 | |
| return jsonify({ | |
| "message": "Layout data analyzed successfully", | |
| "page_height": page_height, | |
| "page_width": page_width, | |
| "x1": x1, | |
| "y4": y4 | |
| }), 200 | |
| except Exception as e: | |
| print("Error", str(e), flush=True) | |
| return jsonify({"error": str(e)}), 500 | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=8000) | |