Spaces:
Paused
Paused
File size: 6,162 Bytes
19e75c0 d5863e7 751c042 d5863e7 3878f81 b34b7d7 0770449 fe4b5f1 0770449 d5863e7 0770449 b34b7d7 0770449 d5863e7 fe4b5f1 0770449 fe4b5f1 0770449 fe4b5f1 0770449 8f1d4f2 6229292 b34b7d7 1016fdb 8f1d4f2 6229292 0770449 1016fdb 0770449 d13603d 0770449 32562a3 2c3245b 8f1d4f2 2c3245b d13603d 2c3245b d13603d 2c3245b 5f76223 2c3245b 5f76223 2c3245b d13603d 2c3245b eb7bd29 d0c75d3 d5863e7 d0c75d3 d5863e7 d0c75d3 d5863e7 d0c75d3 d5863e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
from typing import Union
from fastapi import FastAPI, HTTPException, UploadFile, WebSocket
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import pickle
import uvicorn
import logging
import os
import shutil
import subprocess
import torch
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
# from langchain.embeddings import HuggingFaceEmbeddings
from run_localGPT import load_model
from prompt_template_utils import get_prompt_template
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from werkzeug.utils import secure_filename
from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME
# if torch.backends.mps.is_available():
# DEVICE_TYPE = "mps"
# elif torch.cuda.is_available():
# DEVICE_TYPE = "cuda"
# else:
# DEVICE_TYPE = "cpu"
DEVICE_TYPE = "cuda"
SHOW_SOURCES = True
EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE})
# load the vectorstore
DB = Chroma(
persist_directory=PERSIST_DIRECTORY,
embedding_function=EMBEDDINGS,
client_settings=CHROMA_SETTINGS,
)
RETRIEVER = DB.as_retriever()
LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
prompt, memory = get_prompt_template(promptTemplate_type="llama", history=False)
template = """you are a helpful, respectful and honest assistant.
Your name is Katara llma. You should only use the source documents provided to answer the questions.
You should only respond only topics that contains in documents use to training.
Use the following pieces of context to answer the question at the end.
Always answer in the most helpful and safe way possible.
If you don't know the answer to a question, just say that you don't know, don't try to make up an answer, don't share false information.
Use 15 sentences maximum. Keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
QA = RetrievalQA.from_chain_type(
llm=LLM,
chain_type="stuff",
retriever=RETRIEVER,
return_source_documents=SHOW_SOURCES,
chain_type_kwargs={
"prompt": QA_CHAIN_PROMPT,
},
)
class Predict(BaseModel):
prompt: str
app = FastAPI(title="homepage-app")
api_app = FastAPI(title="api app")
app.mount("/api", api_app, name="api")
app.mount("/", StaticFiles(directory="static",html = True), name="static")
@api_app.post('/predict')
async def predict(data: Predict):
user_prompt = data.prompt
if user_prompt:
# print(f'User Prompt: {user_prompt}')
# Get the answer from the chain
res = QA(user_prompt)
answer, docs = res["result"], res["source_documents"]
prompt_response_dict = {
"Prompt": user_prompt,
"Answer": answer,
}
prompt_response_dict["Sources"] = []
for document in docs:
prompt_response_dict["Sources"].append(
(os.path.basename(str(document.metadata["source"])), str(document.page_content))
)
return {"response": prompt_response_dict}
else:
raise HTTPException(status_code=400, detail="Prompt Incorrect")
@api_app.get("/run_ingest")
def run_ingest_route():
try:
if os.path.exists(PERSIST_DIRECTORY):
try:
shutil.rmtree(PERSIST_DIRECTORY)
except OSError as e:
raise HTTPException(status_code=500, detail=f"Error: {e.filename} - {e.strerror}.")
else:
raise HTTPException(status_code=500, detail="The directory does not exist")
run_langest_commands = ["python", "ingest.py"]
if DEVICE_TYPE == "cpu":
run_langest_commands.append("--device_type")
run_langest_commands.append(DEVICE_TYPE)
result = subprocess.run(run_langest_commands, capture_output=True)
if result.returncode != 0:
raise HTTPException(status_code=400, detail="Script execution failed: {}")
# load the vectorstore
DB = Chroma(
persist_directory=PERSIST_DIRECTORY,
embedding_function=EMBEDDINGS,
client_settings=CHROMA_SETTINGS,
)
RETRIEVER = DB.as_retriever()
prompt, memory = get_prompt_template(promptTemplate_type="llama", history=False)
QA = RetrievalQA.from_chain_type(
llm=LLM,
chain_type="stuff",
retriever=RETRIEVER,
return_source_documents=SHOW_SOURCES,
chain_type_kwargs={
"prompt": prompt,
},
)
response = "Script executed successfully: {}".format(result.stdout.decode("utf-8"))
return {"response": response}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")
@api_app.post("/save_document/")
async def create_upload_file(file: Union[UploadFile, None] = None):
try:
if not file:
raise HTTPException(status_code=400, detail="No upload file sent")
else:
if file.filename == "":
raise HTTPException(status_code=400, detail="No selected file")
if file:
filename = secure_filename(file.filename)
folder_path = "SOURCE_DOCUMENTS"
if not os.path.exists(folder_path):
os.makedirs(folder_path)
file_path = os.path.join(folder_path, filename)
file.save(file_path)
return {"response": "File saved successfully"}
except Exception as e:
raise HTTPException(status_code=400, detail=e)
@api_app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
while True:
data = await websocket.receive_text()
await websocket.send_text(f"Message text was: {data}")
|