Spaces:
Paused
Paused
File size: 8,210 Bytes
0770449 9ef164b 0770449 99f6cbc c287747 07e217d c4cd469 c18ec7e d815dea c18ec7e 0770449 f2fde57 0770449 fe4b5f1 cb776ef 2a13ed4 c18ec7e 0770449 1d1dd8d 0770449 8c08762 0770449 c18ec7e d5863e7 0770449 b34b7d7 0770449 99f6cbc abff149 1ae7653 5068745 1ae7653 a6b827b d815dea 07e217d abff149 1ae7653 99f6cbc 0770449 d815dea 2a13ed4 fe4b5f1 cb776ef fe4b5f1 cb776ef 2a13ed4 fe4b5f1 0770449 fe4b5f1 0770449 44398ac 8f1d4f2 6229292 b34b7d7 1016fdb 8f1d4f2 9ef164b 2c3245b b1f4ef7 2c3245b d13603d 2c3245b d13603d 2c3245b b1f4ef7 2c3245b 5f76223 2c3245b 5f76223 2c3245b b1f4ef7 2c3245b b1f4ef7 cb776ef 2c3245b d13603d b606edb cb776ef 2c3245b ea4ffd3 9ef164b b1f4ef7 9ef164b b1f4ef7 ea4ffd3 9ef164b 2ea73cf b1f4ef7 9ef164b b1f4ef7 2ea73cf b1f4ef7 eb7bd29 177debf 8c08762 177debf f2fde57 177debf 8c08762 177debf d5863e7 abff149 ba93db8 d5863e7 ef75206 866cbc1 ff06cd6 d815dea 866cbc1 3c4d8f2 abff149 3bf68c9 866cbc1 225e078 25121ae d815dea ef75206 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
import os
import glob
import shutil
import subprocess
import asyncio
from typing import Any, Dict, List
from fastapi import FastAPI, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
# import torch
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import LLMResult
# from langchain.embeddings import HuggingFaceEmbeddings
from load_models import load_model
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME, PATH_NAME_SOURCE_DIRECTORY
class Predict(BaseModel):
prompt: str
class Delete(BaseModel):
filename: str
# if torch.backends.mps.is_available():
# DEVICE_TYPE = "mps"
# elif torch.cuda.is_available():
# DEVICE_TYPE = "cuda"
# else:
# DEVICE_TYPE = "cpu"
DEVICE_TYPE = "cuda"
SHOW_SOURCES = True
EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE})
# load the vectorstore
DB = Chroma(
persist_directory=PERSIST_DIRECTORY,
embedding_function=EMBEDDINGS,
client_settings=CHROMA_SETTINGS,
)
RETRIEVER = DB.as_retriever()
class MyCustomSyncHandler(BaseCallbackHandler):
def __init__(self):
self.end = False
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
self.end = False
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
self.end = True
def on_llm_new_token(self, token: str, **kwargs) -> Any:
print(self)
print(kwargs)
# Create State
handlerToken = MyCustomSyncHandler()
LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=True, callbacks=[handlerToken])
template = """You are a helpful, respectful and honest assistant.
Always answer in the most helpful and safe way possible without trying to make up an answer, if you don't know the answer just say "I don't know" and don't share false information or topics that were not provided in your training. Use a maximum of 15 sentences. Your answer should be as concise and clear as possible. Always say "thank you for asking!" at the end of your answer.
Context: {context}
Question: {question}
"""
memory = ConversationBufferMemory(input_key="question", memory_key="history")
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
QA = RetrievalQA.from_chain_type(
llm=LLM,
chain_type="stuff",
retriever=RETRIEVER,
return_source_documents=SHOW_SOURCES,
chain_type_kwargs={
"prompt": QA_CHAIN_PROMPT,
},
)
app = FastAPI(title="homepage-app")
api_app = FastAPI(title="api app")
app.mount("/api", api_app, name="api")
app.mount("/", StaticFiles(directory="static",html = True), name="static")
@api_app.get("/training")
def run_ingest_route():
global DB
global RETRIEVER
global QA
try:
if os.path.exists(PERSIST_DIRECTORY):
try:
shutil.rmtree(PERSIST_DIRECTORY)
except OSError as e:
raise HTTPException(status_code=500, detail=f"Error: {e.filename} - {e.strerror}.")
else:
raise HTTPException(status_code=500, detail="The directory does not exist")
run_langest_commands = ["python", "ingest.py"]
if DEVICE_TYPE == "cpu":
run_langest_commands.append("--device_type")
run_langest_commands.append(DEVICE_TYPE)
result = subprocess.run(run_langest_commands, capture_output=True)
if result.returncode != 0:
raise HTTPException(status_code=400, detail="Script execution failed: {}")
# load the vectorstore
DB = Chroma(
persist_directory=PERSIST_DIRECTORY,
embedding_function=EMBEDDINGS,
client_settings=CHROMA_SETTINGS,
)
RETRIEVER = DB.as_retriever()
QA = RetrievalQA.from_chain_type(
llm=LLM,
chain_type="stuff",
retriever=RETRIEVER,
return_source_documents=SHOW_SOURCES,
chain_type_kwargs={
"prompt": QA_CHAIN_PROMPT,
"memory": memory
},
)
return {"response": "The training was successfully completed"}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")
@api_app.get("/api/files")
def get_files():
upload_dir = os.path.join(os.getcwd(), PATH_NAME_SOURCE_DIRECTORY)
files = glob.glob(os.path.join(upload_dir, '*'))
return {"directory": upload_dir, "files": files}
@api_app.delete("/api/delete_document")
def delete_source_route(data: Delete):
filename = data.filename
path_source_documents = os.path.join(os.getcwd(), PATH_NAME_SOURCE_DIRECTORY)
file_to_delete = f"{path_source_documents}/{filename}"
if os.path.exists(file_to_delete):
try:
os.remove(file_to_delete)
print(f"{file_to_delete} has been deleted.")
return {"message": f"{file_to_delete} has been deleted."}
except OSError as e:
raise HTTPException(status_code=400, detail=print(f"error: {e}."))
else:
raise HTTPException(status_code=400, detail=print(f"The file {file_to_delete} does not exist."))
@api_app.post('/predict')
async def predict(data: Predict):
global QA
user_prompt = data.prompt
if user_prompt:
res = QA(user_prompt)
answer, docs = res["result"], res["source_documents"]
prompt_response_dict = {
"Prompt": user_prompt,
"Answer": answer,
}
prompt_response_dict["Sources"] = []
for document in docs:
prompt_response_dict["Sources"].append(
(os.path.basename(str(document.metadata["source"])), str(document.page_content))
)
return {"response": prompt_response_dict}
else:
raise HTTPException(status_code=400, detail="Prompt Incorrect")
@api_app.post("/save_document/")
async def create_upload_file(file: UploadFile):
# Get the file size (in bytes)
file.file.seek(0, 2)
file_size = file.file.tell()
# move the cursor back to the beginning
await file.seek(0)
if file_size > 10 * 1024 * 1024:
# more than 10 MB
raise HTTPException(status_code=400, detail="File too large")
content_type = file.content_type
if content_type not in [
"text/plain",
"text/markdown",
"text/x-markdown",
"text/csv",
"application/msword",
"application/pdf",
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"text/x-python",
"application/x-python-code"]:
raise HTTPException(status_code=400, detail="Invalid file type")
upload_dir = os.path.join(os.getcwd(), PATH_NAME_SOURCE_DIRECTORY)
if not os.path.exists(upload_dir):
os.makedirs(upload_dir)
dest = os.path.join(upload_dir, file.filename)
with open(dest, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
return {"filename": file.filename}
@api_app.websocket("/ws/{client_id}")
async def websocket_endpoint(websocket: WebSocket, client_id: int):
global QA
await websocket.accept()
oldReceiveText = ''
try:
while True:
prompt = await websocket.receive_text()
if (oldReceiveText != prompt):
handlerToken.callback = websocket.send_text
oldReceiveText = prompt
await QA(prompt)
except WebSocketDisconnect:
print('disconnect')
except RuntimeError as error:
print(error)
|