Spaces:
Running
Running
import logging | |
import json | |
from typing import Dict, Any, List | |
from flask import Flask, request, jsonify, render_template, Response | |
from services.scraper_service import ScraperService | |
from services.llm_service import LLMService | |
from services.scheduler_service import scheduler_service | |
from utils.rate_limiter import RateLimiter | |
from api.horoscope_routes import horoscope_bp | |
logger = logging.getLogger(__name__) | |
# Create services | |
scraper_service = ScraperService() | |
llm_service = LLMService() | |
# API-wide rate limiter (10 requests per minute) | |
api_rate_limiter = RateLimiter(window_size=60, max_requests=10) | |
def register_routes(app: Flask): | |
"""Register API routes with Flask app""" | |
# Register blueprints | |
app.register_blueprint(horoscope_bp) | |
# Start scheduler service | |
scheduler_service.start() | |
def ping(): | |
return "pong", 200 | |
def index(): | |
"""Home page with API documentation""" | |
return render_template('index.html') | |
def docs(): | |
"""Detailed API documentation""" | |
return render_template('docs.html') | |
def health_check(): | |
"""Health check endpoint""" | |
return jsonify({ | |
"status": "ok", | |
"services": { | |
"scraper": "up", | |
"llm": "up" if llm_service.api_key else "down", | |
"scheduler": "up" if scheduler_service.running else "down" | |
} | |
}) | |
def scrape_endpoint(): | |
"""Endpoint to scrape a single URL""" | |
# Check rate limit | |
if not api_rate_limiter.can_proceed(): | |
return jsonify({ | |
"error": "Rate limit exceeded", | |
"wait_seconds": api_rate_limiter.get_wait_time() | |
}), 429 | |
# Record request for rate limiting | |
api_rate_limiter.record_request() | |
# Get URL from request | |
data = request.get_json() | |
if not data or 'url' not in data: | |
return jsonify({"error": "Missing 'url' in request"}), 400 | |
url = data['url'] | |
scraper_type = data.get('type') # Optional scraper type | |
# Perform scraping | |
result = scraper_service.scrape_url(url, scraper_type) | |
return jsonify(result) | |
def scrape_multiple_endpoint(): | |
"""Endpoint to scrape multiple URLs""" | |
# Check rate limit | |
if not api_rate_limiter.can_proceed(): | |
return jsonify({ | |
"error": "Rate limit exceeded", | |
"wait_seconds": api_rate_limiter.get_wait_time() | |
}), 429 | |
# Record request for rate limiting | |
api_rate_limiter.record_request() | |
# Get URLs from request | |
data = request.get_json() | |
if not data or 'urls' not in data: | |
return jsonify({"error": "Missing 'urls' in request"}), 400 | |
urls = data['urls'] | |
if not isinstance(urls, list) or not urls: | |
return jsonify({"error": "'urls' must be a non-empty list"}), 400 | |
# Limit number of URLs to prevent abuse | |
if len(urls) > 10: | |
return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400 | |
# Perform scraping | |
results = scraper_service.scrape_multiple_urls(urls) | |
return jsonify({"results": results}) | |
def consolidate_endpoint(): | |
"""Endpoint to consolidate data using LLM""" | |
# Check rate limit | |
if not api_rate_limiter.can_proceed(): | |
return jsonify({ | |
"error": "Rate limit exceeded", | |
"wait_seconds": api_rate_limiter.get_wait_time() | |
}), 429 | |
# Record request for rate limiting | |
api_rate_limiter.record_request() | |
# Get data from request | |
data = request.get_json() | |
if not data or 'scraped_data' not in data: | |
return jsonify({"error": "Missing 'scraped_data' in request"}), 400 | |
scraped_data = data['scraped_data'] | |
if not isinstance(scraped_data, list) or not scraped_data: | |
return jsonify({"error": "'scraped_data' must be a non-empty list"}), 400 | |
# Consolidate data using LLM | |
result = llm_service.consolidate_data(scraped_data) | |
return jsonify(result) | |
def scrape_and_consolidate_endpoint(): | |
"""Endpoint to scrape URLs and consolidate the data in one request""" | |
# Check rate limit | |
if not api_rate_limiter.can_proceed(): | |
return jsonify({ | |
"error": "Rate limit exceeded", | |
"wait_seconds": api_rate_limiter.get_wait_time() | |
}), 429 | |
# Record request for rate limiting | |
api_rate_limiter.record_request() | |
# Get URLs from request | |
data = request.get_json() | |
if not data or 'urls' not in data: | |
return jsonify({"error": "Missing 'urls' in request"}), 400 | |
urls = data['urls'] | |
if not isinstance(urls, list) or not urls: | |
return jsonify({"error": "'urls' must be a non-empty list"}), 400 | |
# Limit number of URLs to prevent abuse | |
if len(urls) > 10: | |
return jsonify({"error": "Maximum 10 URLs allowed per request"}), 400 | |
# Perform scraping | |
scraped_results = scraper_service.scrape_multiple_urls(urls) | |
# Filter out failed scraping results | |
successful_results = [r for r in scraped_results if r.get('success', False)] | |
if not successful_results: | |
return jsonify({ | |
"error": "All scraping operations failed", | |
"scraped_results": scraped_results | |
}), 500 | |
# Consolidate data using LLM | |
consolidated_result = llm_service.consolidate_data(successful_results) | |
return jsonify({ | |
"consolidated_data": consolidated_result, | |
"scraped_results": scraped_results | |
}) | |
def summarize_endpoint(): | |
"""Endpoint to summarize content""" | |
# Check rate limit | |
if not api_rate_limiter.can_proceed(): | |
return jsonify({ | |
"error": "Rate limit exceeded", | |
"wait_seconds": api_rate_limiter.get_wait_time() | |
}), 429 | |
# Record request for rate limiting | |
api_rate_limiter.record_request() | |
# Get content from request | |
data = request.get_json() | |
if not data or 'text' not in data: | |
return jsonify({"error": "Missing 'text' in request"}), 400 | |
text = data['text'] | |
max_length = data.get('max_length', 500) | |
# Summarize content using LLM | |
summary = llm_service.summarize_content(text, max_length) | |
return jsonify({"summary": summary}) | |
logger.info("API routes registered") | |