Spaces:
Runtime error
Runtime error
File size: 3,493 Bytes
4b137c2 0d0810c 73c11b0 4b137c2 e715043 c6d08b7 e715043 433f502 c6d08b7 4b137c2 0d0810c 4b137c2 73c11b0 4b137c2 0d0810c 4b137c2 9380cde 4b137c2 e715043 9380cde e715043 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
from huggingface_hub import InferenceClient
import gradio as gr
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
# <img src="/file=val_speaking_transparent.gif" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Hi Jennifer, welcome to DTF</h1>
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything about working at here...</p>
</div>.
"""
def format_prompt(message, history):
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(
prompt,
history,
temperature=0.9,
max_new_tokens=256,
top_p=0.95,
repetition_penalty=1.0,
):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(
formatted_prompt,
**generate_kwargs,
stream=True,
details=True,
return_full_text=False,
)
output = ""
for response in stream:
output += response.token.text
yield output
return output
# additional_inputs = [
# gr.Slider(
# label="Temperature",
# value=0.9,
# minimum=0.0,
# maximum=1.0,
# step=0.05,
# interactive=True,
# info="Higher values produce more diverse outputs",
# ),
# gr.Slider(
# label="Max new tokens",
# value=256,
# minimum=0,
# maximum=1048,
# step=64,
# interactive=True,
# info="The maximum numbers of new tokens",
# ),
# gr.Slider(
# label="Top-p (nucleus sampling)",
# value=0.90,
# minimum=0.0,
# maximum=1,
# step=0.05,
# interactive=True,
# info="Higher values sample more low-probability tokens",
# ),
# gr.Slider(
# label="Repetition penalty",
# value=1.2,
# minimum=1.0,
# maximum=2.0,
# step=0.05,
# interactive=True,
# info="Penalize repeated tokens",
# ),
# ]
with gr.Blocks(fill_height=True) as demo:
gr.Image("/file=val_speaking_transparent.gif")
gr.Markdown("Hi I'm Val the Voyager, welcome onboard!")
gr.ChatInterface(
fn=generate,
chatbot=gr.Chatbot(
show_label=False,
show_share_button=False,
show_copy_button=True,
likeable=True,
layout="panel",
placeholder=PLACEHOLDER,
),
# additional_inputs=additional_inputs,
examples=[
["Ask me what an acronym stands for"],
["How can I check my leave allowance?"],
["Where can I find a floor map of 1 Macarthur?"],
["How can I find out about DTF's Disability network?"],
],
cache_examples=False,
title="""Voyager Val""",
)
if __name__ == "__main__":
demo.launch()
|