| """ |
| Pydantic models for the scraper API. |
| """ |
|
|
| from datetime import datetime |
| from typing import Any, Dict, List, Optional |
|
|
| from pydantic import BaseModel, Field |
|
|
|
|
| class TaskInput(BaseModel): |
| title: str = Field(..., description="Task title") |
| content: str = Field(..., description="Task body") |
| source_url: str = Field(..., description="Source URL") |
| task_type: Optional[str] = Field(None, description="Task type") |
| images: Optional[List[str]] = Field(default_factory=list, description="Task images") |
| variants: Optional[List[str]] = Field(default_factory=list, description="Answer variants") |
|
|
|
|
| class TaskResponse(BaseModel): |
| id: Optional[int] = None |
| title: str |
| content: str |
| source_url: str |
| task_type: Optional[str] = None |
| images: Optional[List[str]] = None |
| variants: Optional[List[str]] = None |
| task_number: Optional[int] = None |
| source_kind: Optional[str] = None |
| task_guid: Optional[str] = None |
| can_check_answer: bool = False |
| scraped_at: Optional[datetime] = None |
| rubert_analysis: Optional[Dict[str, Any]] = None |
|
|
|
|
| class ScrapeRequest(BaseModel): |
| subject: Optional[str] = Field("russian", description="Subject code") |
| urls: Optional[List[str]] = Field(default_factory=list, description="Explicit URLs to scrape") |
| query: Optional[str] = Field(None, description="Search query") |
| full_refresh: bool = Field( |
| False, |
| description="When true, also scrape official archives and open-variant PDFs", |
| ) |
|
|
|
|
| class ScrapeResponse(BaseModel): |
| success: bool |
| tasks_scraped: int |
| tasks_saved: int |
| duplicates_skipped: int |
| message: str |
|
|
|
|
| class CheckAnswerRequest(BaseModel): |
| answer: str = Field(..., min_length=1, description="Submitted answer") |
|
|
|
|
| class CheckAnswerResponse(BaseModel): |
| success: bool |
| is_correct: bool |
| status_code: str |
| status_label: str |
| submitted_answer: str |
| normalized_answer: str |
| message: str |
|
|
|
|
| class AnalysisRequest(BaseModel): |
| text: str = Field(..., description="Text to analyze") |
|
|
|
|
| class AnalysisResponse(BaseModel): |
| category: str |
| keywords: List[str] |
| confidence: float |
| embedding: Optional[List[float]] = None |
|
|
|
|
| class HealthResponse(BaseModel): |
| status: str |
| timestamp: datetime |
| services: Dict[str, bool] |
|
|
|
|
| class StatsResponse(BaseModel): |
| total_tasks: int |
| by_type: Dict[str, int] |
| last_scrape: Optional[datetime] = None |
|
|
|
|
| class ErrorResponse(BaseModel): |
| error: str |
| detail: Optional[str] = None |
| timestamp: datetime = Field(default_factory=datetime.utcnow) |
|
|