from huggingface_hub import InferenceClient import gradio as gr client = InferenceClient( "mistralai/Mixtral-8x7B-Instruct-v0.1" ) system_prompt = """Let's say You are Santa Claus, the jolly old elf who knows everything about Christmas. As an LLM, your job is to shortly answer questions about Christmas traditions, gift ideas, and the meaning of Christmas. Use a cheerful and festive tone. Try to keep your responses short.""" def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate( prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, ): temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history) stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text yield output return output additional_inputs=[ gr.Textbox( label="System Prompt", max_lines=1, interactive=True, ), gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ), gr.Slider( label="Max new tokens", value=256, minimum=0, maximum=1048, step=64, interactive=True, info="The maximum numbers of new tokens", ), gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ), gr.Slider( label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens", ) ] examples=[["How old are you?", None, None, None, None, None, ], ["Who is your favorite reindeer?", None, None, None, None, None,], ["What is your secret to pass through chimneys?", None, None, None, None, None,], ["What is your real name?", None, None, None, None, None,], ["Did you receive my wish list?", None, None, None, None, None,], ] css=""" .gradio-container{ max-width: 720px!important; } img#santa-portrait { margin: 20px auto; border-radius: 10px; } """ santa_portrait = """ https://cdn-lfs-us-1.huggingface.co/repos/d7/f7/d7f7d979f9cc4900d4c2cfb12b580241727ac1977845850590d9ae820c297614/f8ec6cadd640be5c45e017703b276faad4786f1180ef7b0609a646a24cd224fd?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27santa_avatar.png%3B+filename%3D%22santa_avatar.png%22%3B&response-content-type=image%2Fpng&Expires=1703512203&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMzUxMjIwM319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Q3L2Y3L2Q3ZjdkOTc5ZjljYzQ5MDBkNGMyY2ZiMTJiNTgwMjQxNzI3YWMxOTc3ODQ1ODUwNTkwZDlhZTgyMGMyOTc2MTQvZjhlYzZjYWRkNjQwYmU1YzQ1ZTAxNzcwM2IyNzZmYWFkNDc4NmYxMTgwZWY3YjA2MDlhNjQ2YTI0Y2QyMjRmZD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=M1QyL2Gwb12b0RxZesFOMOpgLzYoL3fVBG2mutAL0ERoqdfRMeec9mlytfxvMSGj88TIZEv1X%7E5Eu%7ERkXh0HovLO3kYR7Q%7Eclfc9P7dVD6FN7g1yDoCU1D6p5SsHsyjsGLm7tyVMb9TWD71DSCV0pIOGDKroxkwGZHwQG1M93gqCYdNWcxOCijiWN9AxsFoG5JaE9092j5SfI9fiQS4tBAtOTFlep2TXPBtJ9rXpGbNpcD2VCKi4wneGAxjJ6gbh77TfB3e79TjpcUir5QpRbFgj4NRnogBylOei4SNHqLI3eB-3ua7BRpkWCwNpNd7jXMoomwokcVW%7Exm63%7E2YjDw__&Key-Pair-Id=KCD77M1F0VK2B """ gr.ChatInterface( fn=generate, chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, bubble_full_width=False, avatar_images=[None, "santa_avatar.png"]), #additional_inputs=additional_inputs, title=f" Chat with Santa", examples=examples, concurrency_limit=20, css=css ).launch(show_api=False)