Spaces:
Running
Running
from flask import Flask, request, jsonify | |
import requests | |
import time | |
import json | |
import supabase | |
import logging | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
# Azure Document Intelligence setup | |
AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/" | |
AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC" | |
# Supabase setup | |
SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/" | |
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4" | |
supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY) | |
app = Flask(__name__) | |
def log_debug(message, **kwargs): | |
"""Log debug messages for tracking.""" | |
print(f"[DEBUG] {message}") | |
if kwargs: | |
for key, value in kwargs.items(): | |
print(f" - {key}: {value}") | |
def download_file_from_supabase(file_path): | |
# """Download file from Supabase storage.""" | |
response = supabase_client.storage.from_("files").download(file_path) | |
# No need to check status_code here, as response.content is the file content (bytes). | |
if isinstance(response, bytes): # Direct check if response is file content. | |
return response | |
else: | |
raise Exception(f"Failed to download file from Supabase: {response.text}") | |
def analyze_pdf_layout(file_content): | |
# """Send PDF to Azure and get layout data.""" | |
url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31" | |
headers = { | |
"Ocp-Apim-Subscription-Key": AZURE_KEY, | |
"Content-Type": "application/pdf", | |
} | |
response = requests.post(url, headers=headers, data=file_content) | |
if response.status_code != 202: | |
raise Exception(f"Azure request failed: {response.text}") | |
operation_location = response.headers.get("Operation-Location") | |
if not operation_location: | |
raise Exception("Operation-Location header not found in response.") | |
while True: | |
result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY}) | |
result = result_response.json() | |
if result.get("status") == "succeeded": | |
return result["analyzeResult"] | |
elif result.get("status") == "failed": | |
raise Exception("Analysis failed.") | |
time.sleep(8) | |
def analyze(): | |
try: | |
# Get file ID from request | |
file_id = request.json.get("file_id") | |
if not file_id: | |
return jsonify({"error": "File ID is required"}), 400 | |
# Fetch file path from Supabase | |
file_data = supabase_client.table("files").select("file_path").eq("id", file_id).single().execute() | |
# Check if file_data is not None and contains valid data | |
if not file_data or not file_data.data: | |
return jsonify({"error": "File not found or Supabase query failed"}), 404 | |
file_path = file_data.data["file_path"] | |
# Download the file from Supabase | |
file_content = download_file_from_supabase(file_path) | |
# Analyze the PDF layout with Azure | |
layout_data = analyze_pdf_layout(file_content) | |
# Extract required layout values | |
page_data = layout_data.get("pages", [])[0] # Assuming single-page PDF for simplicity | |
first_word = page_data.get("words", [])[0] | |
last_word = page_data.get("words", [])[-1] | |
page_height = page_data["height"] | |
page_width = page_data["width"] | |
x1 = first_word["polygon"][0] # X1 of first word | |
y4 = last_word["polygon"][-1] # Y4 of last word | |
# Commenting out the Supabase file update logic | |
update_response = supabase_client.table("files").update({ | |
"page_height": page_height, | |
"page_width": page_width, | |
"x1": x1, | |
"y4": y4, | |
}).eq("id", file_id).execute() | |
# Check if update was successful by checking if data is present and valid | |
if not update_response.data: | |
return jsonify({"error": "Failed to update file layout data"}), 500 | |
# Check if there is any error message in the response | |
if hasattr(update_response, 'error') and update_response.error: | |
return jsonify({"error": "Failed to update file layout data", "details": update_response.error}), 500 | |
return jsonify({ | |
"message": "Layout data analyzed successfully", | |
"page_height": page_height, | |
"page_width": page_width, | |
"x1": x1, | |
"y4": y4 | |
}), 200 | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=8000) | |