Spaces:
Running
Running
File size: 2,174 Bytes
59aaeae c04ffe5 59aaeae c04ffe5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
"""
Utility function for processing files with OCR in the Historical OCR Workshop app.
"""
import os
import tempfile
from pathlib import Path
from datetime import datetime
def process_file(uploaded_file, use_vision=True, processor=None, custom_prompt=None):
"""Process the uploaded file and return the OCR results
Args:
uploaded_file: The uploaded file to process
use_vision: Whether to use vision model
processor: StructuredOCR processor (if None, it will be imported)
custom_prompt: Optional additional instructions for the model
Returns:
dict: The OCR results
"""
# Import the processor if not provided
if processor is None:
from structured_ocr import StructuredOCR
processor = StructuredOCR()
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(uploaded_file.name).suffix) as tmp:
tmp.write(uploaded_file.getvalue())
temp_path = tmp.name
try:
# Determine file type from extension
file_ext = Path(uploaded_file.name).suffix.lower()
file_type = "pdf" if file_ext == ".pdf" else "image"
# Get file size in MB
file_size_mb = os.path.getsize(temp_path) / (1024 * 1024)
# Process the file with file size information for automatic page limiting
result = processor.process_file(
temp_path,
file_type=file_type,
use_vision=use_vision,
file_size_mb=file_size_mb,
custom_prompt=custom_prompt
)
# Add processing metadata
result.update({
"file_name": uploaded_file.name,
"processed_at": datetime.now().isoformat(),
"file_size_mb": round(file_size_mb, 2),
"use_vision": use_vision
})
return result
except Exception as e:
return {
"error": str(e),
"file_name": uploaded_file.name
}
finally:
# Clean up the temporary file
if os.path.exists(temp_path):
os.unlink(temp_path)
|