Spaces:

Yapp99
/

LLM_endpoint

Running

Yapp99 commited on 21 days ago

Commit

b37221e

•

1 Parent(s): 0d3b8dc

Some minor tweaks

Files changed (3) hide show

api.py CHANGED Viewed

@@ -18,7 +18,7 @@ logger = logging.getLogger("uvicorn.error")
 @app.get("/")
 def index():
     logger.info("this is a debug message")
-    return {"hello": "world"}
 @app.post("/chat_stream")
@@ -68,6 +68,7 @@ def chat(request: ChatRequest):
     }
     try:
         output = chat_with_model(request.chat_history, request.model, kwargs)
-        return HTMLResponse(output, media_type="text/plain")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 @app.get("/")
 def index():
     logger.info("this is a debug message")
+    return {"Hello": "world"}
 @app.post("/chat_stream")
     }
     try:
         output = chat_with_model(request.chat_history, request.model, kwargs)
+        return {"response": output}
+        # return HTMLResponse(output, media_type="text/plain")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

llm_backend.py CHANGED Viewed

@@ -66,7 +66,7 @@ def chat_with_model(chat_history, model, kwargs: dict):
     input_kwargs = {**default_kwargs, **kwargs, **forced_kwargs}
     response = llm.__call__(prompt, **input_kwargs)
-    return response["choices"][0]["text"]
 # %% example input

     input_kwargs = {**default_kwargs, **kwargs, **forced_kwargs}
     response = llm.__call__(prompt, **input_kwargs)
+    return response["choices"][0]["text"].strip()
 # %% example input

schema.py CHANGED Viewed

@@ -25,7 +25,7 @@ MODEL_ARGS = {
 logger = logging.getLogger("uvicorn.error")
 for model_arg in MODEL_ARGS.values():
-    logger.info("this is a debug message")
     hf_hub_download(**model_arg)
@@ -37,7 +37,6 @@ class Message(BaseModel):
 class ChatRequest(BaseModel):
     chat_history: List[Message]
     model: Literal["llama3.2", "falcon-mamba", "mistral-nemo"] = "llama3.2"
-    stream: bool = False
     max_tokens: Optional[int] = 65536
     temperature: float = 0.8
     top_p: float = 0.95

 logger = logging.getLogger("uvicorn.error")
 for model_arg in MODEL_ARGS.values():
+    logger.info(f"Checking for {model_arg['repo_id']}")
     hf_hub_download(**model_arg)
 class ChatRequest(BaseModel):
     chat_history: List[Message]
     model: Literal["llama3.2", "falcon-mamba", "mistral-nemo"] = "llama3.2"
     max_tokens: Optional[int] = 65536
     temperature: float = 0.8
     top_p: float = 0.95