Spaces:

IDK100boysaj
/

coeditxl

Sleeping

App Files Files Community

coeditxl / app.py

IDK100boysaj

Update app.py

026aefc verified 17 days ago

raw

history blame contribute delete

27.4 kB


	from fastapi import FastAPI, HTTPException, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel, Field, field_validator, ConfigDict
	from contextlib import asynccontextmanager
	from optimum.onnxruntime import ORTModelForSeq2SeqLM
	from transformers import AutoTokenizer
	from difflib import SequenceMatcher
	from slowapi import Limiter, _rate_limit_exceeded_handler
	from slowapi.util import get_remote_address
	from slowapi.errors import RateLimitExceeded
	import onnxruntime as ort
	import uvicorn
	import os
	import time
	import logging
	from typing import List, Tuple
	import re
	import uuid
	from datetime import datetime

	os.environ["CUDA_VISIBLE_DEVICES"] = ""

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Global model variables
	model = None
	tokenizer = None
	start_time = None # Track server uptime

	# Configuration
	VERSION = "1.0.0"
	MODEL_PATH = "IDK100boysaj/coedit-xl-onnx-8bit"
	MAX_INPUT_TOKENS = 512 # CoEdit-XL hard limit (including prompt)
	MAX_OUTPUT_TOKENS = 512 # Fixed at 512 as requested
	OVERLAP_SENTENCES = 2 # Fixed 2-sentence overlap for adaptive merging
	MAX_TEXT_LENGTH = 50000 # Character limit to prevent memory issues
	BATCH_SIZE = 2 # Conservative batch size for 2vCPU + 16GB RAM

	# Rate limiting configuration
	limiter = Limiter(key_func=get_remote_address)
	RATE_LIMIT = "100/minute" # 100 requests per minute per IP

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Load model on startup, cleanup on shutdown"""
	global model, tokenizer, start_time

	start_time = datetime.now()
	logger.info(f"Starting CoEdit-XL API v{VERSION}...")
	logger.info(f"Loading CoEdit-XL quantized ONNX model from {MODEL_PATH}...")

	try:
	# Configure ONNX Runtime session options for 2vCPU optimization
	session_options = ort.SessionOptions()
	session_options.intra_op_num_threads = 2 # Match vCPU count
	session_options.inter_op_num_threads = 1 # Single thread for inter-op
	session_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL # Sequential execution
	session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

	# Disable thread spinning to save CPU cycles on wait
	session_options.add_session_config_entry("session.intra_op.allow_spinning", "0")
	session_options.add_session_config_entry("session.inter_op.allow_spinning", "0")

	logger.info("Configured ONNX session: 2 intra-op threads, no spinning, sequential execution")

	# Load quantized ONNX model with Optimum
	# Explicitly specify file names for non-standard naming
	model = ORTModelForSeq2SeqLM.from_pretrained(
	MODEL_PATH,
	provider="CPUExecutionProvider", # Force CPU execution
	session_options=session_options, # Add optimized session options
	encoder_file_name="encoder_model.onnx",
	decoder_file_name="decoder_model.onnx",
	decoder_with_past_file_name="decoder_with_past_model.onnx",
	)

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_PATH,
	)

	logger.info("✓ Quantized ONNX model loaded successfully on CPU")
	logger.info(f"Optimizations: Sentence-based chunking \| Fixed {OVERLAP_SENTENCES}-sentence overlap \| Batch size: {BATCH_SIZE}")
	logger.info(f"Adaptive merging with 0.9 similarity threshold")
	logger.info(f"Max input tokens: {MAX_INPUT_TOKENS} \| Max output tokens: {MAX_OUTPUT_TOKENS}")

	except Exception as e:
	logger.error(f"Failed to load model: {str(e)}")
	raise RuntimeError(f"Model loading failed: {str(e)}")

	yield # Server runs here

	# Cleanup on shutdown
	logger.info("Shutting down...")
	model = None
	tokenizer = None

	app = FastAPI(
	title="CoEdit-XL Grammar Correction API",
	version=VERSION,
	lifespan=lifespan,
	description="High-performance grammar correction API using quantized CoEdit-XL with rolling window support",
	)

	# Add rate limiting
	app.state.limiter = limiter

	@app.exception_handler(RateLimitExceeded)
	async def rate_limit_handler(request: Request, exc: RateLimitExceeded):
	"""Custom rate limit exceeded handler"""
	return JSONResponse(
	status_code=429,
	content={
	"detail": "Rate limit exceeded. Maximum 100 requests per minute.",
	"retry_after": "60 seconds"
	}
	)

	# CORS middleware for cross-origin requests
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # Configure for production
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	class CorrectionRequest(BaseModel):
	model_config = ConfigDict(
	json_schema_extra={
	"example": {
	"text": "When I grow up, I start to understand what he said is quite right.",
	"prompt": "Fix grammatical errors in this sentence:"
	}
	}
	)

	text: str = Field(..., min_length=1, max_length=MAX_TEXT_LENGTH, description="Text to correct")
	prompt: str = Field("Fix grammatical errors in this sentence:", description="Task prompt (e.g., 'Fix grammatical errors:', 'Make this text more formal:', 'Paraphrase this:')")

	@field_validator('text')
	@classmethod
	def text_not_empty(cls, v: str) -> str:
	if not v.strip():
	raise ValueError('Text cannot be empty or only whitespace')
	return v

	@field_validator('prompt')
	@classmethod
	def prompt_not_empty(cls, v: str) -> str:
	if not v.strip():
	raise ValueError('Prompt cannot be empty or only whitespace')
	return v.strip()

	class CorrectionResponse(BaseModel):
	original: str
	corrected: str
	num_chunks: int
	processing_time_seconds: float
	changes_made: bool
	model: str = "CoEdit-XL (Quantized ONNX)"

	def split_into_complete_sentences(text: str) -> List[str]:
	"""
	Split text into complete sentences with proper punctuation handling.

	Handles:
	- Sentence terminators: . ? ! (and standalone semicolons)
	- Abbreviations: Dr., Mr., Mrs., Ms., Ph.D., etc., i.e., e.g., vs., Inc., Ltd.
	- Semicolons in lists (doesn't split on these)
	- Preserves punctuation with each sentence

	Examples:
	"Dr. Smith likes apples; oranges; milk. He is happy!"
	→ ["Dr. Smith likes apples; oranges; milk.", "He is happy!"]

	"I love it. They are great!"
	→ ["I love it.", "They are great!"]
	"""
	# Common abbreviations that end with period (not sentence boundaries)
	abbreviations = [
	'Dr', 'Mr', 'Mrs', 'Ms', 'Prof', 'Sr', 'Jr',
	'Ph.D', 'M.D', 'B.A', 'M.A', 'Ph.D.', 'M.D.', 'B.A.', 'M.A.',
	'etc', 'vs', 'Inc', 'Ltd', 'Co', 'Corp',
	'i.e', 'e.g', 'a.m', 'p.m', 'U.S', 'U.K',
	'i.e.', 'e.g.', 'a.m.', 'p.m.', 'U.S.', 'U.K.'
	]

	# Split on sentence boundaries: [.!?] followed by space and capital letter
	# But check it's not an abbreviation first
	sentences = []
	current = []
	words = text.split()

	for i, word in enumerate(words):
	current.append(word)

	# Check if this word ends with a sentence terminator
	if word and word[-1] in '.!?':
	# Check if it's an abbreviation
	is_abbrev = any(word.rstrip('.!?').endswith(abbr.rstrip('.')) for abbr in abbreviations)

	# If not abbreviation and next word starts with capital, it's a sentence boundary
	if not is_abbrev and i + 1 < len(words) and words[i + 1] and words[i + 1][0].isupper():
	sentences.append(' '.join(current))
	current = []

	# Add remaining words
	if current:
	sentences.append(' '.join(current))

	# Handle semicolons that act as sentence separators (not in lists)
	# A semicolon is a separator if followed by space and capital letter
	result = []
	for sent in sentences:
	# Check if this sentence contains standalone semicolons (sentence separators)
	# Pattern: semicolon followed by space and capital letter, not in a list context
	# List context: has colons before semicolons (e.g., "buy: apples; oranges")
	if ';' in sent:
	# Check if it's a list (has colon before semicolons)
	colon_pos = sent.find(':')
	semicolon_positions = [i for i, c in enumerate(sent) if c == ';']

	# If semicolons come after a colon, it's likely a list - don't split
	is_list = colon_pos >= 0 and any(sp > colon_pos for sp in semicolon_positions)

	if not is_list:
	# Split on semicolons followed by space and capital
	subsents = re.split(r';\s+(?=[A-Z])', sent)
	# Add semicolon back to each part except last
	for i, ss in enumerate(subsents[:-1]):
	result.append(ss + ';')
	if subsents[-1].strip():
	result.append(subsents[-1])
	else:
	result.append(sent)
	else:
	result.append(sent)

	# Clean up and filter
	return [s.strip() for s in result if s.strip()]

	def chunk_by_fixed_overlap(text: str, tokenizer, max_tokens: int = MAX_INPUT_TOKENS, overlap_sentences: int = OVERLAP_SENTENCES) -> Tuple[List[str], List[int]]:
	"""
	Split text into chunks with FIXED sentence overlap (not percentage).

	Returns:
	chunks: List of text chunks
	overlap_counts: List indicating how many sentences in each chunk overlap from previous chunk

	Example with overlap_sentences=2:
	Sentences: ["A.", "B.", "C.", "D.", "E."]
	Chunks:
	- Chunk 0: ["A.", "B.", "C."] → overlap_count=0
	- Chunk 1: ["B.", "C.", "D."] → overlap_count=2
	- Chunk 2: ["C.", "D.", "E."] → overlap_count=2
	"""
	# Tokenize full text to count tokens
	tokens = tokenizer.encode(text, add_special_tokens=False)

	if len(tokens) <= max_tokens:
	return ([text], [0])

	# Split into sentences first (better for CoEdit context)
	sentences = split_into_complete_sentences(text)

	if len(sentences) == 0:
	return ([text], [0])

	chunks = []
	overlap_counts = []
	sentence_idx = 0

	while sentence_idx < len(sentences):
	current_chunk_sentences = []
	current_tokens = 0
	chunk_start_idx = sentence_idx

	# Determine overlap count for this chunk
	if len(chunks) == 0:
	# First chunk has no overlap
	overlap_count = 0
	else:
	# Subsequent chunks: go back by overlap_sentences
	overlap_count = min(overlap_sentences, sentence_idx)
	chunk_start_idx = sentence_idx - overlap_count

	# Build chunk starting from chunk_start_idx
	for i in range(chunk_start_idx, len(sentences)):
	sentence = sentences[i]
	sentence_tokens = tokenizer.encode(sentence, add_special_tokens=False)
	sentence_token_count = len(sentence_tokens)

	# Check if adding this sentence would exceed token limit
	if current_tokens + sentence_token_count > max_tokens and current_chunk_sentences:
	# Chunk is full, stop here
	break

	current_chunk_sentences.append(sentence)
	current_tokens += sentence_token_count

	# If this is a new sentence (not overlap), advance the index
	if i >= sentence_idx:
	sentence_idx = i + 1

	# Save the chunk
	if current_chunk_sentences:
	chunks.append(' '.join(current_chunk_sentences))
	overlap_counts.append(overlap_count)
	else:
	# Edge case: single sentence too long, split by words
	long_sentence = sentences[chunk_start_idx]
	words = long_sentence.split()
	word_chunk = []
	word_tokens = 0

	for word in words:
	word_token_count = len(tokenizer.encode(word, add_special_tokens=False))
	if word_tokens + word_token_count > max_tokens and word_chunk:
	chunks.append(' '.join(word_chunk))
	overlap_counts.append(0) # Word-level chunks don't have sentence overlap
	word_chunk = []
	word_tokens = 0

	word_chunk.append(word)
	word_tokens += word_token_count

	if word_chunk:
	chunks.append(' '.join(word_chunk))
	overlap_counts.append(0)

	sentence_idx += 1

	return (chunks, overlap_counts)

	def merge_with_adaptive_overlap(
	corrected_chunks: List[str],
	overlap_counts: List[int],
	similarity_threshold: float = 0.9
	) -> str:
	"""
	Merge corrected chunks using adaptive overlap strategy.

	Compares overlapping sentences and:
	- If similarity >= threshold: Keep previous version (consistency)
	- If similarity < threshold: Use current version (better context)

	Args:
	corrected_chunks: List of corrected text chunks
	overlap_counts: Number of sentences in each chunk that overlap from previous
	similarity_threshold: Minimum similarity to keep previous version (default 0.9)

	Returns:
	Merged corrected text

	Example:
	Chunk 1: "I like apples. They are delicious."
	Chunk 2 (overlap=1): "They are delicious. My friend agrees."
	→ If both have "They are delicious." (similarity=1.0), keep chunk 1's version
	→ Add "My friend agrees."
	"""
	if len(corrected_chunks) == 0:
	return ""

	if len(corrected_chunks) == 1:
	return corrected_chunks[0]

	# Start with first chunk (no overlap)
	result_sentences = split_into_complete_sentences(corrected_chunks[0])

	for chunk_idx in range(1, len(corrected_chunks)):
	current_chunk = corrected_chunks[chunk_idx]
	overlap_count = overlap_counts[chunk_idx]

	# Split current chunk into sentences
	current_sentences = split_into_complete_sentences(current_chunk)

	if overlap_count == 0:
	# No overlap, just append all sentences
	result_sentences.extend(current_sentences)
	continue

	# Compare overlapping sentences
	# Get last N sentences from result (where N = overlap_count)
	comparison_count = min(overlap_count, len(result_sentences))
	previous_overlap_sents = result_sentences[-comparison_count:] if comparison_count > 0 else []

	# Get first N sentences from current chunk
	current_overlap_sents = current_sentences[:min(overlap_count, len(current_sentences))]

	# Compare each overlapping sentence
	sentences_to_replace = []
	for i in range(min(len(previous_overlap_sents), len(current_overlap_sents))):
	prev_sent = previous_overlap_sents[i]
	curr_sent = current_overlap_sents[i]

	# Calculate similarity
	similarity = SequenceMatcher(None, prev_sent.lower(), curr_sent.lower()).ratio()

	logger.debug(f"Comparing overlap {i+1}: similarity={similarity:.3f}")
	logger.debug(f" Previous: {prev_sent[:80]}...")
	logger.debug(f" Current: {curr_sent[:80]}...")

	if similarity < similarity_threshold:
	# Current version is significantly different, use it (better context)
	sentences_to_replace.append((i, curr_sent))
	logger.debug(f" → Using current version (better context)")
	else:
	# Keep previous version (consistency)
	logger.debug(f" → Keeping previous version (consistent)")

	# Replace sentences if needed
	for idx, new_sent in sentences_to_replace:
	result_idx = len(result_sentences) - comparison_count + idx
	if 0 <= result_idx < len(result_sentences):
	result_sentences[result_idx] = new_sent

	# Add non-overlapping sentences from current chunk
	non_overlap_start = min(overlap_count, len(current_sentences))
	if non_overlap_start < len(current_sentences):
	result_sentences.extend(current_sentences[non_overlap_start:])

	return ' '.join(result_sentences)

	def correct_text(
	text: str,
	prompt: str = "Fix grammatical errors in this sentence:"
	) -> Tuple[str, int]:
	"""
	Correct text using CoEdit-XL with rolling window approach.
	Returns: (corrected_text, num_chunks)

	CoEdit format: "{prompt} {text}"
	Example prompts:
	- "Fix grammatical errors in this sentence:"
	- "Make this text more formal:"
	- "Paraphrase this:"
	- "Improve the clarity of this text:"
	"""
	if not text.strip():
	return text, 0

	# Safety check - should never happen due to endpoint guard
	if tokenizer is None or model is None:
	raise RuntimeError("Model or tokenizer not initialized")

	# Ensure prompt ends with space or colon for proper formatting
	if not prompt.endswith((' ', ':')):
	prompt = prompt + ' '
	elif prompt.endswith(':') and not prompt.endswith(': '):
	prompt = prompt + ' '

	# Calculate max tokens for text content (reserve tokens for prompt)
	prompt_tokens = len(tokenizer.encode(prompt, add_special_tokens=False))
	max_text_tokens = MAX_INPUT_TOKENS - prompt_tokens - 2 # -2 for special tokens

	if max_text_tokens < 100:
	logger.warning(f"Prompt is too long ({prompt_tokens} tokens), may affect chunking")
	max_text_tokens = 100 # Minimum text tokens

	# Split text into manageable chunks with FIXED sentence overlap
	original_chunks, overlap_counts = chunk_by_fixed_overlap(text, tokenizer, max_text_tokens, overlap_sentences=OVERLAP_SENTENCES)
	logger.info(f"Split text into {len(original_chunks)} chunks with fixed {OVERLAP_SENTENCES}-sentence overlap")
	logger.info(f"Overlap counts: {overlap_counts}")

	corrected_chunks = []

	# Process chunks in batches for better performance
	batch_size = min(BATCH_SIZE, len(original_chunks))

	for batch_start in range(0, len(original_chunks), batch_size):
	batch_end = min(batch_start + batch_size, len(original_chunks))
	batch = original_chunks[batch_start:batch_end]

	logger.info(f"Processing batch {batch_start//batch_size + 1} (chunks {batch_start+1}-{batch_end}/{len(original_chunks)})")

	try:
	# Prepare inputs with user-provided prompt (CRITICAL: prompt repeats for each chunk!)
	prompted_batch = [f"{prompt}{chunk}" for chunk in batch]

	# Tokenize batch
	inputs = tokenizer(
	prompted_batch,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=MAX_INPUT_TOKENS
	)

	# Generate corrections with fixed max_length=512
	outputs = model.generate(
	**inputs,
	max_length=MAX_OUTPUT_TOKENS,
	early_stopping=True,
	)

	# Decode outputs (prompt is removed by skip_special_tokens=True)
	corrected_batch = tokenizer.batch_decode(outputs, skip_special_tokens=True)
	corrected_chunks.extend(corrected_batch)

	except Exception as e:
	logger.error(f"Error processing batch: {str(e)}")
	# Fallback to original chunks if correction fails
	corrected_chunks.extend(batch)

	# Merge corrected chunks with adaptive overlap strategy
	final_corrected = merge_with_adaptive_overlap(corrected_chunks, overlap_counts, similarity_threshold=0.9)

	return final_corrected, len(original_chunks)

	@app.get("/")
	async def root():
	"""Health check endpoint"""
	return {
	"status": "online",
	"service": "CoEdit-XL Grammar Correction API",
	"model": "CoEdit-XL (Quantized ONNX)",
	"model_path": MODEL_PATH,
	"max_input_tokens": MAX_INPUT_TOKENS,
	"max_output_tokens": MAX_OUTPUT_TOKENS,
	"overlap_sentences": OVERLAP_SENTENCES,
	"adaptive_merging": True,
	"similarity_threshold": 0.9,
	"max_text_length": MAX_TEXT_LENGTH,
	"example_prompts": [
	"Fix grammatical errors in this sentence:",
	"Make this text more formal:",
	"Paraphrase this:",
	"Improve the clarity of this text:",
	"Neutralize this text:",
	"Make this easier to understand:"
	],
	"endpoints": {
	"correction": "POST /correct",
	"health": "GET /health",
	}
	}

	@app.get("/health")
	async def health():
	"""Enhanced health check with uptime and system metrics"""
	try:
	import psutil

	uptime_seconds = (datetime.now() - start_time).total_seconds() if start_time else 0
	cpu_percent = psutil.cpu_percent(interval=0.1)
	mem = psutil.virtual_memory()

	return {
	"status": "healthy",
	"version": VERSION,
	"uptime_seconds": round(uptime_seconds, 2),
	"uptime_human": str(datetime.now() - start_time).split('.')[0] if start_time else "N/A",
	"model_loaded": model is not None,
	"tokenizer_loaded": tokenizer is not None,
	"device": "CPU (ONNX Runtime)",
	"configuration": {
	"max_input_tokens": MAX_INPUT_TOKENS,
	"max_output_tokens": MAX_OUTPUT_TOKENS,
	"overlap_sentences": OVERLAP_SENTENCES,
	"max_text_length": MAX_TEXT_LENGTH,
	"batch_size": BATCH_SIZE,
	"rate_limit": RATE_LIMIT
	},
	"system": {
	"cpu_usage_percent": cpu_percent,
	"memory_used_gb": round(mem.used / (1024**3), 2),
	"memory_available_gb": round(mem.available / (1024**3), 2),
	"memory_percent": mem.percent
	}
	}
	except ImportError:
	uptime_seconds = (datetime.now() - start_time).total_seconds() if start_time else 0
	return {
	"status": "healthy",
	"version": VERSION,
	"uptime_seconds": round(uptime_seconds, 2),
	"uptime_human": str(datetime.now() - start_time).split('.')[0] if start_time else "N/A",
	"model_loaded": model is not None,
	"tokenizer_loaded": tokenizer is not None,
	"device": "CPU (ONNX Runtime)",
	"configuration": {
	"max_input_tokens": MAX_INPUT_TOKENS,
	"max_output_tokens": MAX_OUTPUT_TOKENS,
	"overlap_sentences": OVERLAP_SENTENCES,
	"max_text_length": MAX_TEXT_LENGTH,
	"batch_size": BATCH_SIZE,
	"rate_limit": RATE_LIMIT
	},
	"note": "Install psutil for system metrics"
	}

	@app.post("/correct", response_model=CorrectionResponse)
	@limiter.limit(RATE_LIMIT)
	async def correct_grammar(request_obj: Request, request: CorrectionRequest):
	"""
	Correct/edit text using CoEdit-XL with custom prompts.
	Handles texts longer than 512 tokens using rolling window approach.
	Rate limited to 100 requests per minute per IP.

	Example prompts:
	- "Fix grammatical errors in this sentence:"
	- "Make this text more formal:"
	- "Paraphrase this:"
	- "Improve the clarity of this text:"
	"""
	request_id = str(uuid.uuid4())[:8]

	if model is None or tokenizer is None:
	logger.error(f"[{request_id}] Model not loaded")
	raise HTTPException(
	status_code=503,
	detail="Service temporarily unavailable. Model not loaded."
	)

	# Validate text length (Pydantic already checks, but double-check for safety)
	if len(request.text) > MAX_TEXT_LENGTH:
	logger.warning(f"[{request_id}] Text too large: {len(request.text)} chars")
	raise HTTPException(
	status_code=413,
	detail=f"Text too large. Maximum allowed: {MAX_TEXT_LENGTH} characters."
	)

	try:
	start_time_req = time.time()
	logger.info(f"[{request_id}] Correction request \| Text: {len(request.text)} chars \| Prompt: '{request.prompt[:50]}...'")

	corrected, num_chunks = correct_text(
	request.text,
	request.prompt
	)

	processing_time = time.time() - start_time_req
	changes_made = corrected.strip() != request.text.strip()

	logger.info(f"[{request_id}] Completed in {processing_time:.2f}s \| Chunks: {num_chunks} \| Changes: {changes_made}")

	return CorrectionResponse(
	original=request.text,
	corrected=corrected,
	num_chunks=num_chunks,
	processing_time_seconds=round(processing_time, 3),
	changes_made=changes_made
	)

	except ValueError as e:
	# Input validation errors
	logger.warning(f"[{request_id}] Validation error: {str(e)}")
	raise HTTPException(
	status_code=400,
	detail="Invalid input. Please check your text and prompt."
	)
	except MemoryError:
	logger.error(f"[{request_id}] Memory error during processing")
	raise HTTPException(
	status_code=413,
	detail="Text too complex to process. Try shorter text."
	)
	except Exception as e:
	# Catch-all for unexpected errors - sanitize the error message
	logger.error(f"[{request_id}] Correction failed: {str(e)}", exc_info=True)
	raise HTTPException(
	status_code=500,
	detail="An internal error occurred. Please try again later."
	)

	@app.exception_handler(Exception)
	async def global_exception_handler(request, exc):
	"""Global exception handler for unexpected errors"""
	logger.error(f"Unhandled exception: {str(exc)}", exc_info=True)
	return JSONResponse(
	status_code=500,
	content={"detail": "Internal server error occurred"}
	)

	if __name__ == "__main__":
	# Run server on port 8080
	# For production, use reverse proxy (nginx) on ports 80/443 -> 8080
	uvicorn.run(
	app,
	host="0.0.0.0",
	port=8080,
	log_level="info",
	access_log=True,
	)

	import subprocess
	import time

	# Wait for server to start
	time.sleep(5)

	# Test if server responds
	try:
	result = subprocess.run(
	["curl", "-f", "http://localhost:8080"],
	capture_output=True,
	timeout=5
	)
	print(f"Self-ping status: {result.returncode}")
	print(f"Output: {result.stdout.decode()}")
	except Exception as e:
	print(f"Self-ping failed: {e}")