rikki809 commited on
Commit
b7ffe78
·
1 Parent(s): f5c63d8

first commit

Browse files
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ libglib2.0-0 \
8
+ libsm6 \
9
+ libxrender1 \
10
+ libxext6 \
11
+ poppler-utils \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ COPY requirements.txt .
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ COPY . .
18
+
19
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/core/config.py ADDED
@@ -0,0 +1 @@
 
 
1
+ URL = "*"
app/main.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from app.routes.qa import router as qa_router
4
+ from dotenv import load_dotenv
5
+ from app.core.config import URL
6
+
7
+ load_dotenv()
8
+
9
+ app = FastAPI()
10
+
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=[URL],
14
+ allow_credentials=True,
15
+ allow_methods=["*"],
16
+ allow_headers=["*"],
17
+ )
18
+
19
+ app.include_router(qa_router, prefix="/api")
app/models/embeddings.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import faiss
3
+
4
+ model = SentenceTransformer('all-MiniLM-L6-v2')
5
+
6
+ def embed_and_index_chunks(chunks):
7
+ embeddings = model.encode(chunks, convert_to_numpy=True)
8
+ index = faiss.IndexFlatL2(embeddings.shape[1])
9
+ index.add(embeddings)
10
+ return index, embeddings
app/routes/qa.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, UploadFile, File, Form
2
+ import tempfile
3
+ import shutil
4
+ from app.models.embeddings import embed_and_index_chunks, model
5
+ from app.utils.file_parser import extract_text
6
+ from app.utils.chunking import chunk_text
7
+ import numpy as np
8
+ import os
9
+ import google.generativeai as genai
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ router = APIRouter()
15
+
16
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
17
+ gemini = genai.GenerativeModel("gemini-2.5-pro")
18
+
19
+ @router.post("/ask")
20
+ async def ask_doc(file: UploadFile = File(...), query: str = Form(...)):
21
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as tmp:
22
+ shutil.copyfileobj(file.file, tmp)
23
+ tmp_path = tmp.name
24
+
25
+ text = extract_text(tmp_path)
26
+ chunks = chunk_text(text)
27
+ index, _ = embed_and_index_chunks(chunks)
28
+
29
+ query_embedding = model.encode([query], convert_to_numpy=True)
30
+ D, I = index.search(query_embedding, k=3)
31
+ retrieved_chunks = [chunks[i] for i in I[0]]
32
+ context = "\n\n".join(retrieved_chunks)
33
+
34
+ prompt = f"""
35
+ User Query: {query}
36
+
37
+ Relevant Clauses from Document:
38
+ {context}
39
+
40
+ Based on the above, return a JSON with:
41
+ - decision (approved/rejected)
42
+ - amount (if applicable)
43
+ - justification with referenced clauses
44
+ """
45
+
46
+ response = gemini.generate_content(prompt)
47
+ return {"result": response.text}
app/utils/chunking.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def chunk_text(text, chunk_size=300):
2
+ sentences = text.split(".")
3
+ chunks, current = [], ""
4
+ for s in sentences:
5
+ if len(current) + len(s) < chunk_size:
6
+ current += s + "."
7
+ else:
8
+ chunks.append(current.strip())
9
+ current = s + "."
10
+ if current:
11
+ chunks.append(current.strip())
12
+ return chunks
app/utils/file_parser.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ from docx import Document
3
+
4
+ def extract_text(file_path: str) -> str:
5
+ if file_path.endswith(".pdf"):
6
+ doc = fitz.open(file_path)
7
+ return "\n".join(page.get_text() for page in doc)
8
+ elif file_path.endswith(".docx"):
9
+ doc = Document(file_path)
10
+ return "\n".join(p.text for p in doc.paragraphs)
11
+ else:
12
+ raise ValueError("Unsupported file type")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ sentence-transformers==2.2.2
5
+ faiss-cpu
6
+ PyMuPDF
7
+ python-docx
8
+ python-dotenv
9
+ google-generativeai==0.8.5