File size: 2,607 Bytes
2fffffc
 
c0328b1
2fffffc
 
7e15b41
2fffffc
 
1fdbc4a
5d509ee
 
b6d3f21
5d509ee
 
 
 
 
 
3ab2b4d
 
2fffffc
 
9533475
 
 
 
 
 
 
2fffffc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fdbc4a
3ab2b4d
2fffffc
1fdbc4a
2fffffc
 
 
 
1fdbc4a
 
2fffffc
 
 
 
1fdbc4a
2fffffc
 
1fdbc4a
 
2fffffc
1fdbc4a
2fffffc
 
1fdbc4a
2fffffc
 
1fdbc4a
 
2fffffc
 
 
 
1fdbc4a
2fffffc
 
1fdbc4a
2fffffc
 
 
 
 
1fdbc4a
2fffffc
 
3ab2b4d
b4b7f48
3ab2b4d
2fffffc
 
 
 
1fdbc4a
2fffffc
 
3ab2b4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from huggingface_hub import InferenceClient
import gradio as gr
import os

client = InferenceClient(
    "mistralai/Mixtral-8x7B-Instruct-v0.2"
)


# Geheime Eingabeaufforderung aus Umgebungsvariablen
secret_prompt = os.getenv("SECRET_PROMPT")

def format_prompt(new_message, history):
    prompt = secret_prompt
    for user_msg, bot_msg in history:
        prompt += f"[INST] {user_msg} [/INST]"
        prompt += f" {bot_msg}</s> "
    prompt += f"[INST] {new_message} [/INST]"
    return prompt


def generate(
    prompt,
    history,
    system_prompt,
    temperature=0.9,
    max_new_tokens=256,
    top_p=0.95,
    repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    return output


additional_inputs = [
    gr.Textbox(
        label="System Prompt",
        max_lines=1,
        interactive=True,
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=256,
        minimum=0,
        maximum=1048,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]
examples = [
   
]
gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 46.7B",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)