File size: 4,538 Bytes
6656c48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse
from reportCleaning import process_pdf_to_chunks
from ragService import ingest_chunks, rag_query, delete_document_chunks
import json
from pathlib import Path
import logging
from typing import List, Optional


logger = logging.getLogger(__name__)

app = FastAPI(title="Python Hello World")

# Allow MERN to access
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

class Message(BaseModel):
    message: str

class IngestRequest(BaseModel):
    document_id: str
    chunks: List[dict]

class ChatMessage(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    query: str
    document_id: str
    chat_history: Optional[List[ChatMessage]] = []

class DeleteRequest(BaseModel):
    document_id: str


@app.get("/")
def home():
    return {"message": "Hello World from Python Server!"}

@app.post("/hello")
def hello_world(data: Message):
    return {
        "status": "success",
        "python_says": "Hello World from Python",
        "you_sent": data.message,
        "note": "Connection between MERN and Python is successful"
    }


@app.post("/upload-pdf")
async def receive_pdf(file: UploadFile = File(...)):
    print(f"recived {file.filename} in python")
    if not file.filename.lower().endswith('.pdf'):
        raise HTTPException(status_code=400, detail="Only PDF files are allowed")

    file_bytes = await file.read()

    try:
        logger.info(f"Starting processing: {file.filename} | Size: {len(file_bytes)/1024:.1f} KB")

        rag_chunks = process_pdf_to_chunks(file_bytes, file.filename)

        logger.info(f"Successfully generated {len(rag_chunks)} chunks")

        def generate_jsonl():
            for chunk in rag_chunks:
                yield json.dumps(chunk, ensure_ascii=False) + "\n"

        response = StreamingResponse(
            generate_jsonl(),
            media_type="application/jsonl",
            headers={
                "Content-Disposition": f'attachment; filename="{Path(file.filename).stem}_chunks.jsonl"'
            }
        )

        # Add custom header for easier verification
        response.headers["X-Chunks-Count"] = str(len(rag_chunks))
        response.headers["X-Processing-Status"] = "success"

        return response

    except Exception as e:
        logger.error(f"Failed to process {file.filename}: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/ingest-chunks")
async def ingest_chunks_endpoint(req: IngestRequest):

    try:
        logger.info(f"Ingesting {len(req.chunks)} chunks for document {req.document_id}")

        stored = ingest_chunks(req.chunks, req.document_id)

        return {
            "status": "success",
            "chunks_stored": stored,
            "document_id": req.document_id
        }

    except Exception as e:
        logger.error(f"Ingest failed for {req.document_id}: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/chat")
async def chat_endpoint(req: ChatRequest):

    if not req.query.strip():
        raise HTTPException(status_code=400, detail="Query cannot be empty")

    try:
        history = [{"role": m.role, "content": m.content} for m in (req.chat_history or [])]

        result = rag_query(req.query, req.document_id, history)

        return {
            "status": "success",
            "answer": result["answer"],
            "sources": result["sources"],
            "chunks_used": result["chunks_used"],
            "document_id": req.document_id
        }

    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.error(f"Chat failed for {req.document_id}: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@app.delete("/document-chunks")
async def delete_chunks(req: DeleteRequest):

    try:
        deleted = delete_document_chunks(req.document_id)

        return {
            "status": "success",
            "deleted": deleted,
            "document_id": req.document_id
        }

    except Exception as e:
        logger.error(f"Delete failed for {req.document_id}: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))