|
from fastapi import APIRouter, UploadFile, File |
|
from fastapi.responses import JSONResponse |
|
import openai |
|
import io |
|
import os |
|
from pypdf import PdfReader |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import Qdrant |
|
from langchain.schema import Document |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.llms import OpenAI |
|
from db import vector_store |
|
|
|
router = APIRouter() |
|
_db = vector_store.get_instance() |
|
_chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce") |
|
|
|
@router.post("/v1/docs") |
|
async def index_doc(file: UploadFile = File(...)): |
|
async for doc in generate_documents(file): |
|
_db.add_documents([doc]) |
|
|
|
return JSONResponse(status_code=200, content={}) |
|
|
|
@router.get("/v1/docs") |
|
async def search(query: str): |
|
print(query) |
|
docs = _db.similarity_search(query=query) |
|
print(docs) |
|
answer = _chain.run(input_documents=docs, question=query) |
|
return JSONResponse(status_code=200, content={"answer": answer}) |
|
|
|
async def generate_documents(file: UploadFile): |
|
num=0 |
|
async for txt in convert_documents(file): |
|
num += 1 |
|
document = Document(page_content=txt,metadata={"page": num}) |
|
yield document |
|
|
|
async def convert_documents(file: UploadFile): |
|
|
|
if file.content_type == 'application/pdf': |
|
content = await file.read() |
|
pdf_reader = PdfReader(io.BytesIO(content)) |
|
try: |
|
for page in pdf_reader.pages: |
|
yield page.extract_text() |
|
except Exception as e: |
|
print(f"Exception {e}") |
|
else: |
|
return |