Spaces:

olivierdehaene
/

chat-llm-streaming

Running

App Files Files Community

OlivierDehaene commited on Mar 13, 2023

Commit

f15edb7

•

1 Parent(s): 173e7dd

openchat support

Browse files

Files changed (1) hide show

app.py +76 -28

app.py CHANGED Viewed

@@ -4,6 +4,13 @@ import gradio as gr
 from text_generation import Client, InferenceAPIClient
 def get_client(model: str):
     if model == "Rallio67/joi2_20B_instruct_alpha":
@@ -14,26 +21,30 @@ def get_client(model: str):
 def get_usernames(model: str):
     if model == "Rallio67/joi2_20B_instruct_alpha":
-        return "User: ", "Joi: "
     if model == "togethercomputer/GPT-NeoXT-Chat-Base-20B":
-        return "<human>: ", "<bot>: "
-    return "User: ", "Assistant: "
 def predict(
-        model: str,
-        inputs: str,
-        top_p: float,
-        temperature: float,
-        top_k: int,
-        repetition_penalty: float,
-        watermark: bool,
-        chatbot,
-        history,
 ):
     client = get_client(model)
-    user_name, assistant_name = get_usernames(model)
     history.append(inputs)
@@ -43,19 +54,20 @@ def predict(
         if not user_data.startswith(user_name):
             user_data = user_name + user_data
-        if not model_data.startswith("\n\n" + assistant_name):
-            model_data = "\n\n" + assistant_name + model_data
-        past.append(user_data + model_data + "\n\n")
     if not inputs.startswith(user_name):
         inputs = user_name + inputs
-    total_inputs = "".join(past) + inputs + "\n\n" + assistant_name
     partial_words = ""
-    for i, response in enumerate(client.generate_stream(
             total_inputs,
             top_p=top_p if top_p < 1.0 else None,
             top_k=top_k,
@@ -65,7 +77,8 @@ def predict(
             temperature=temperature,
             max_new_tokens=500,
             stop_sequences=[user_name.rstrip(), assistant_name.rstrip()],
-    )):
         if response.token.special:
             continue
@@ -81,7 +94,8 @@ def predict(
             history[-1] = partial_words
         chat = [
-            (history[i].strip(), history[i + 1].strip()) for i in range(0, len(history) - 1, 2)
         ]
         yield chat, history
@@ -90,6 +104,26 @@ def reset_textbox():
     return gr.update(value="")
 title = """<h1 align="center">🔥Large Language Model API 🚀Streaming🚀</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
@@ -104,17 +138,21 @@ Assistant: <utterance>
 In this app, you can explore the outputs of multiple LLMs when prompted in this way.
 """
 with gr.Blocks(
-        css="""#col_container {margin-left: auto; margin-right: auto;}
                 #chatbot {height: 520px; overflow: auto;}"""
 ) as demo:
     gr.HTML(title)
     with gr.Column(elem_id="col_container"):
         model = gr.Radio(
-            value="Rallio67/joi2_20B_instruct_alpha",
             choices=[
                 "Rallio67/joi2_20B_instruct_alpha",
-                # "togethercomputer/GPT-NeoXT-Chat-Base-20B",
                 "google/flan-t5-xxl",
                 "google/flan-ul2",
                 "bigscience/bloom",
@@ -124,10 +162,12 @@ with gr.Blocks(
             label="Model",
             interactive=True,
         )
         chatbot = gr.Chatbot(elem_id="chatbot")
         inputs = gr.Textbox(
             placeholder="Hi there!", label="Type an input and press Enter"
         )
         state = gr.State([])
         b1 = gr.Button()
@@ -135,7 +175,7 @@ with gr.Blocks(
             top_p = gr.Slider(
                 minimum=-0,
                 maximum=1.0,
-                value=0.95,
                 step=0.05,
                 interactive=True,
                 label="Top-p (nucleus sampling)",
@@ -143,7 +183,7 @@ with gr.Blocks(
             temperature = gr.Slider(
                 minimum=-0,
                 maximum=5.0,
-                value=0.5,
                 step=0.1,
                 interactive=True,
                 label="Temperature",
@@ -151,7 +191,7 @@ with gr.Blocks(
             top_k = gr.Slider(
                 minimum=1,
                 maximum=50,
-                value=4,
                 step=1,
                 interactive=True,
                 label="Top-k",
@@ -159,12 +199,20 @@ with gr.Blocks(
             repetition_penalty = gr.Slider(
                 minimum=0.1,
                 maximum=3.0,
-                value=1.03,
                 step=0.01,
                 interactive=True,
                 label="Repetition Penalty",
             )
-            watermark = gr.Checkbox(value=True, label="Text watermarking")
     inputs.submit(
         predict,

 from text_generation import Client, InferenceAPIClient
+openchat_preprompt = (
+    "\n<human>: Hi!\n<bot>: My name is Bot, model version is 0.15, part of an open-source kit for "
+    "fine-tuning new bots! I was created by Together, LAION, and Ontocord.ai and the open-source "
+    "community. I am not human, not evil and not alive, and thus have no thoughts and feelings, "
+    "but I am programmed to be helpful, polite, honest, and friendly.\n"
+)
 def get_client(model: str):
     if model == "Rallio67/joi2_20B_instruct_alpha":
 def get_usernames(model: str):
+    """
+    Returns:
+        (str, str, str, str): pre-prompt, username, bot name, separator
+    """
     if model == "Rallio67/joi2_20B_instruct_alpha":
+        return "", "User: ", "Joi: ", "\n\n"
     if model == "togethercomputer/GPT-NeoXT-Chat-Base-20B":
+        return openchat_preprompt, "<human>: ", "<bot>: ", "\n"
+    return "", "User: ", "Assistant: ", "\n"
 def predict(
+    model: str,
+    inputs: str,
+    top_p: float,
+    temperature: float,
+    top_k: int,
+    repetition_penalty: float,
+    watermark: bool,
+    chatbot,
+    history,
 ):
     client = get_client(model)
+    preprompt, user_name, assistant_name, sep = get_usernames(model)
     history.append(inputs)
         if not user_data.startswith(user_name):
             user_data = user_name + user_data
+        if not model_data.startswith(sep + assistant_name):
+            model_data = sep + assistant_name + model_data
+        past.append(user_data + model_data.rstrip() + sep)
     if not inputs.startswith(user_name):
         inputs = user_name + inputs
+    total_inputs = preprompt + "".join(past) + inputs + sep + assistant_name.rstrip()
     partial_words = ""
+    for i, response in enumerate(
+        client.generate_stream(
             total_inputs,
             top_p=top_p if top_p < 1.0 else None,
             top_k=top_k,
             temperature=temperature,
             max_new_tokens=500,
             stop_sequences=[user_name.rstrip(), assistant_name.rstrip()],
+        )
+    ):
         if response.token.special:
             continue
             history[-1] = partial_words
         chat = [
+            (history[i].strip(), history[i + 1].strip())
+            for i in range(0, len(history) - 1, 2)
         ]
         yield chat, history
     return gr.update(value="")
+def radio_on_change(
+    value: str, disclaimer, top_p, top_k, temperature, repetition_penalty, watermark
+):
+    if value == "togethercomputer/GPT-NeoXT-Chat-Base-20B":
+        top_p = top_p.update(value=0.25)
+        top_k = top_k.update(value=50)
+        temperature = temperature.update(value=0.6)
+        repetition_penalty = repetition_penalty.update(value=1.01)
+        watermark = watermark.update(False)
+        disclaimer = disclaimer.update(visible=True)
+    else:
+        top_p = top_p.update(value=0.95)
+        top_k = top_k.update(value=4)
+        temperature = temperature.update(value=0.5)
+        repetition_penalty = repetition_penalty.update(value=1.03)
+        watermark = watermark.update(True)
+        disclaimer = disclaimer.update(visible=False)
+    return disclaimer, top_p, top_k, temperature, repetition_penalty, watermark
 title = """<h1 align="center">🔥Large Language Model API 🚀Streaming🚀</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
 In this app, you can explore the outputs of multiple LLMs when prompted in this way.
 """
+openchat_disclaimer = """
+<div align="center">Checkout the official <a href=https://huggingface.co/spaces/togethercomputer/OpenChatKit>OpenChatKit feedback app</a> for the full experience.</div>
+"""
 with gr.Blocks(
+    css="""#col_container {margin-left: auto; margin-right: auto;}
                 #chatbot {height: 520px; overflow: auto;}"""
 ) as demo:
     gr.HTML(title)
     with gr.Column(elem_id="col_container"):
         model = gr.Radio(
+            value="togethercomputer/GPT-NeoXT-Chat-Base-20B",
             choices=[
+                "togethercomputer/GPT-NeoXT-Chat-Base-20B",
                 "Rallio67/joi2_20B_instruct_alpha",
                 "google/flan-t5-xxl",
                 "google/flan-ul2",
                 "bigscience/bloom",
             label="Model",
             interactive=True,
         )
         chatbot = gr.Chatbot(elem_id="chatbot")
         inputs = gr.Textbox(
             placeholder="Hi there!", label="Type an input and press Enter"
         )
+        disclaimer = gr.Markdown(openchat_disclaimer)
         state = gr.State([])
         b1 = gr.Button()
             top_p = gr.Slider(
                 minimum=-0,
                 maximum=1.0,
+                value=0.25,
                 step=0.05,
                 interactive=True,
                 label="Top-p (nucleus sampling)",
             temperature = gr.Slider(
                 minimum=-0,
                 maximum=5.0,
+                value=0.6,
                 step=0.1,
                 interactive=True,
                 label="Temperature",
             top_k = gr.Slider(
                 minimum=1,
                 maximum=50,
+                value=50,
                 step=1,
                 interactive=True,
                 label="Top-k",
             repetition_penalty = gr.Slider(
                 minimum=0.1,
                 maximum=3.0,
+                value=1.01,
                 step=0.01,
                 interactive=True,
                 label="Repetition Penalty",
             )
+            watermark = gr.Checkbox(value=False, label="Text watermarking")
+    model.change(
+        lambda value: radio_on_change(
+            value, disclaimer, top_p, top_k, temperature, repetition_penalty, watermark
+        ),
+        inputs=model,
+        outputs=[disclaimer, top_p, top_k, temperature, repetition_penalty, watermark],
+    )
     inputs.submit(
         predict,