Spaces:
Paused
Paused
File size: 4,199 Bytes
32562a3 751c042 3878f81 b34b7d7 0770449 b34b7d7 0770449 b34b7d7 443105b 0770449 751c042 0770449 32562a3 2c3245b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from fastapi import FastAPI, HTTPException
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import pickle
import uvicorn
import logging
import os
import shutil
import subprocess
import torch
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
# from langchain.embeddings import HuggingFaceEmbeddings
from run_localGPT import load_model
from prompt_template_utils import get_prompt_template
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from werkzeug.utils import secure_filename
from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME
if torch.backends.mps.is_available():
DEVICE_TYPE = "mps"
elif torch.cuda.is_available():
DEVICE_TYPE = "cuda"
else:
DEVICE_TYPE = "cpu"
SHOW_SOURCES = True
logging.info(f"Running on: {DEVICE_TYPE}")
logging.info(f"Display Source Documents set to: {SHOW_SOURCES}")
EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE})
# load the vectorstore
DB = Chroma(
persist_directory=PERSIST_DIRECTORY,
embedding_function=EMBEDDINGS,
client_settings=CHROMA_SETTINGS,
)
RETRIEVER = DB.as_retriever()
LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
prompt, memory = get_prompt_template(promptTemplate_type="llama", history=False)
QA = RetrievalQA.from_chain_type(
llm=LLM,
chain_type="stuff",
retriever=RETRIEVER,
return_source_documents=SHOW_SOURCES,
chain_type_kwargs={
"prompt": prompt,
},
)
class Predict(BaseModel):
prompt: str
app = FastAPI()
@app.get("/")
def root():
return {"API": "An API for Sepsis Prediction."}
app.mount("/static", StaticFiles(directory="static"), name="static")
@app.post('/predict')
async def predict(data: Predict):
user_prompt = data.prompt
if user_prompt:
# print(f'User Prompt: {user_prompt}')
# Get the answer from the chain
res = QA(user_prompt)
answer, docs = res["result"], res["source_documents"]
prompt_response_dict = {
"Prompt": user_prompt,
"Answer": answer,
}
prompt_response_dict["Sources"] = []
for document in docs:
prompt_response_dict["Sources"].append(
(os.path.basename(str(document.metadata["source"])), str(document.page_content))
)
return prompt_response_dict
else:
raise HTTPException(status_code=400, detail="Prompt Incorrect")
@app.get("/run_ingest")
def run_ingest_route():
try:
if os.path.exists(PERSIST_DIRECTORY):
try:
shutil.rmtree(PERSIST_DIRECTORY)
except OSError as e:
print(f"Error: {e.filename} - {e.strerror}.")
else:
print("The directory does not exist")
run_langest_commands = ["python", "ingest.py"]
if DEVICE_TYPE == "cpu":
run_langest_commands.append("--device_type")
run_langest_commands.append(DEVICE_TYPE)
result = subprocess.run(run_langest_commands, capture_output=True)
if result.returncode != 0:
raise HTTPException(status_code=400, detail="Script execution failed: {}")
# load the vectorstore
DB = Chroma(
persist_directory=PERSIST_DIRECTORY,
embedding_function=EMBEDDINGS,
client_settings=CHROMA_SETTINGS,
)
RETRIEVER = DB.as_retriever()
prompt, memory = get_prompt_template(promptTemplate_type="llama", history=False)
QA = RetrievalQA.from_chain_type(
llm=LLM,
chain_type="stuff",
retriever=RETRIEVER,
return_source_documents=SHOW_SOURCES,
chain_type_kwargs={
"prompt": prompt,
},
)
return "Script executed successfully: {}".format(result.stdout.decode("utf-8"))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")
|