File size: 2,326 Bytes
0d3b8dc
f8813f2
efea2bf
f8813f2
0d3b8dc
f8813f2
 
 
efea2bf
f8813f2
 
 
 
 
efea2bf
 
 
 
 
 
 
 
f8813f2
 
 
 
 
 
0d3b8dc
f8813f2
 
 
 
 
 
 
 
 
 
 
 
 
 
0d3b8dc
f8813f2
 
 
0d3b8dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from fastapi.responses import StreamingResponse, HTMLResponse
from fastapi import FastAPI, HTTPException
import logging

from llm_backend import chat_with_model, stream_with_model
from schema import ChatRequest

"""
uvicorn api:app --reload --port 5723
fastapi dev api.py --port 5723
"""

app = FastAPI()

logger = logging.getLogger("uvicorn.error")


@app.get("/")
def index():
    logger.info("this is a debug message")
    return {"hello": "world"}


@app.post("/chat_stream")
def chat_stream(request: ChatRequest):
    kwargs = {
        "max_tokens": request.max_tokens,
        "temperature": request.temperature,
        "stream": True,
        "top_p": request.top_p,
        "min_p": request.min_p,
        "typical_p": request.typical_p,
        "frequency_penalty": request.frequency_penalty,
        "presence_penalty": request.presence_penalty,
        "repeat_penalty": request.repeat_penalty,
        "top_k": request.top_k,
        "seed": request.seed,
        "tfs_z": request.tfs_z,
        "mirostat_mode": request.mirostat_mode,
        "mirostat_tau": request.mirostat_tau,
        "mirostat_eta": request.mirostat_eta,
    }
    try:
        token_generator = stream_with_model(request.chat_history, request.model, kwargs)
        return StreamingResponse(token_generator, media_type="text/plain")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/chat")
def chat(request: ChatRequest):
    kwargs = {
        "max_tokens": request.max_tokens,
        "temperature": request.temperature,
        "stream": False,
        "top_p": request.top_p,
        "min_p": request.min_p,
        "typical_p": request.typical_p,
        "frequency_penalty": request.frequency_penalty,
        "presence_penalty": request.presence_penalty,
        "repeat_penalty": request.repeat_penalty,
        "top_k": request.top_k,
        "seed": request.seed,
        "tfs_z": request.tfs_z,
        "mirostat_mode": request.mirostat_mode,
        "mirostat_tau": request.mirostat_tau,
        "mirostat_eta": request.mirostat_eta,
    }
    try:
        output = chat_with_model(request.chat_history, request.model, kwargs)
        return HTMLResponse(output, media_type="text/plain")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))