Spaces:

edersonmelo
/

deployllm

Sleeping

edersonmelo commited on Jun 19, 2024

Commit

eb8ac8a

verified ·

1 Parent(s): 18f4287

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,13 +1,25 @@
 from fastapi import FastAPI
 import requests
 from llama_cpp import Llama
 app = FastAPI()
-llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
-@app.post("/llm")
 async def stream(item: dict):
     if 'prompt' not in item.keys():
         raise ValueError("prompt é obrigatório")
@@ -16,4 +28,6 @@ async def stream(item: dict):
     temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
     max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
-    return llm(prompt, max_tokens=max_tokens, temperature=temperatura)

 from fastapi import FastAPI
 import requests
 from llama_cpp import Llama
+import threading
 app = FastAPI()
+llm = None
+def start_llm():
+    global llm  # Adicione esta linha para modificar a variável global
+    llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
+@app.post("/health")
+def health_check():
+    return {"status": "ok"}
+@app.post("/deployllm")
 async def stream(item: dict):
+    if llm is None:
+        raise ValueError("modelo carregando, por favor tente mais tarde")
     if 'prompt' not in item.keys():
         raise ValueError("prompt é obrigatório")
     temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
     max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
+    return llm(prompt, max_tokens=max_tokens, temperature=temperatura)
+threading.Thread(target=start_llm).start()