File size: 2,808 Bytes
ba907cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
Pydantic models and data structures for PDF Insight Beta application.

This module defines all the data models used throughout the application.
"""

from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field


class ChatRequest(BaseModel):
    """Request model for chat endpoint."""
    session_id: str = Field(..., description="Session identifier")
    query: str = Field(..., description="User query")
    use_search: bool = Field(default=False, description="Whether to use web search")
    model_name: str = Field(
        default="meta-llama/llama-4-scout-17b-16e-instruct",
        description="LLM model to use"
    )


class SessionRequest(BaseModel):
    """Request model for session-related endpoints."""
    session_id: str = Field(..., description="Session identifier")


class UploadResponse(BaseModel):
    """Response model for PDF upload."""
    status: str
    session_id: str
    message: str


class ChatResponse(BaseModel):
    """Response model for chat endpoint."""
    status: str
    answer: str
    context_used: List[Dict[str, Any]]


class ChatHistoryResponse(BaseModel):
    """Response model for chat history endpoint."""
    status: str
    history: List[Dict[str, str]]


class StatusResponse(BaseModel):
    """Generic status response model."""
    status: str
    message: str


class ErrorResponse(BaseModel):
    """Error response model."""
    status: str
    detail: str
    type: Optional[str] = None


class ModelInfo(BaseModel):
    """Model information."""
    id: str
    name: str


class ModelsResponse(BaseModel):
    """Response model for models endpoint."""
    models: List[ModelInfo]


class ChunkMetadata(BaseModel):
    """Metadata for document chunks."""
    source: Optional[str] = None
    page: Optional[int] = None
    
    class Config:
        extra = "allow"  # Allow additional metadata fields


class DocumentChunk(BaseModel):
    """Document chunk with text and metadata."""
    text: str
    metadata: ChunkMetadata
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary format used in processing."""
        return {
            "text": self.text,
            "metadata": self.metadata.dict()
        }


class SessionData(BaseModel):
    """Session data structure."""
    file_path: str
    file_name: str
    chunks: List[Dict[str, Any]]  # List of chunk dictionaries
    chat_history: List[Dict[str, str]] = Field(default_factory=list)
    
    class Config:
        arbitrary_types_allowed = True  # Allow non-Pydantic types like FAISS index


class ChatHistoryEntry(BaseModel):
    """Single chat history entry."""
    user: str
    assistant: str


class ContextChunk(BaseModel):
    """Context chunk with similarity score."""
    text: str
    score: float
    metadata: Dict[str, Any]