File size: 2,349 Bytes
d4d2732
d551a10
d4d2732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2e6b80
d4d2732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d551a10
 
 
 
 
d4d2732
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from huggingface_hub import InferenceClient
import json

app = FastAPI()

client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")

SYSTEM_MESSAGE = (
    "You are a helpful, respectful and honest assistant. Always answer as helpfully "
    "as possible, while being safe. Your answers should not include any harmful, "
    "unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure "
    "that your responses are socially unbiased and positive in nature.\n\nIf a question "
    "does not make any sense, or is not factually coherent, explain why instead of "
    "answering something not correct. If you don't know the answer to a question, please "
    "don't share false information."
    "Always respond in the language of user prompt for each prompt ." 
)
MAX_TOKENS = 2000
TEMPERATURE = 0.7
TOP_P = 0.95

def respond(message, history: list[tuple[str, str]]):
    messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = client.chat_completion(
        messages,
        max_tokens=MAX_TOKENS,
        stream=True,
        temperature=TEMPERATURE,
        top_p=TOP_P,
    )

    for message in response:  # Handle regular iteration
        yield message.choices[0].delta.content

@app.post("/generate/")
async def generate(request: Request):
    form = await request.form()
    prompt = form.get("prompt")
    history = json.loads(form.get("history", "[]"))  # Default to empty history

    if not prompt:
        raise HTTPException(status_code=400, detail="Prompt is required")

    response_generator = respond(prompt, history)
    final_response = ""
    for part in response_generator:
        final_response += part

    return JSONResponse(content={"response": final_response})

app.mount("/", StaticFiles(directory="static", html=True), name="static")

@app.get("/")
def index() -> FileResponse:
    return FileResponse(path="/app/static/index.html", media_type="text/html")