Spaces:
Sleeping
Sleeping
File size: 2,326 Bytes
0d3b8dc f8813f2 efea2bf f8813f2 0d3b8dc f8813f2 efea2bf f8813f2 efea2bf f8813f2 0d3b8dc f8813f2 0d3b8dc f8813f2 0d3b8dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from fastapi.responses import StreamingResponse, HTMLResponse
from fastapi import FastAPI, HTTPException
import logging
from llm_backend import chat_with_model, stream_with_model
from schema import ChatRequest
"""
uvicorn api:app --reload --port 5723
fastapi dev api.py --port 5723
"""
app = FastAPI()
logger = logging.getLogger("uvicorn.error")
@app.get("/")
def index():
logger.info("this is a debug message")
return {"hello": "world"}
@app.post("/chat_stream")
def chat_stream(request: ChatRequest):
kwargs = {
"max_tokens": request.max_tokens,
"temperature": request.temperature,
"stream": True,
"top_p": request.top_p,
"min_p": request.min_p,
"typical_p": request.typical_p,
"frequency_penalty": request.frequency_penalty,
"presence_penalty": request.presence_penalty,
"repeat_penalty": request.repeat_penalty,
"top_k": request.top_k,
"seed": request.seed,
"tfs_z": request.tfs_z,
"mirostat_mode": request.mirostat_mode,
"mirostat_tau": request.mirostat_tau,
"mirostat_eta": request.mirostat_eta,
}
try:
token_generator = stream_with_model(request.chat_history, request.model, kwargs)
return StreamingResponse(token_generator, media_type="text/plain")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/chat")
def chat(request: ChatRequest):
kwargs = {
"max_tokens": request.max_tokens,
"temperature": request.temperature,
"stream": False,
"top_p": request.top_p,
"min_p": request.min_p,
"typical_p": request.typical_p,
"frequency_penalty": request.frequency_penalty,
"presence_penalty": request.presence_penalty,
"repeat_penalty": request.repeat_penalty,
"top_k": request.top_k,
"seed": request.seed,
"tfs_z": request.tfs_z,
"mirostat_mode": request.mirostat_mode,
"mirostat_tau": request.mirostat_tau,
"mirostat_eta": request.mirostat_eta,
}
try:
output = chat_with_model(request.chat_history, request.model, kwargs)
return HTMLResponse(output, media_type="text/plain")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
|