ChatGLM-6b_Gradio_Streaming

Runtime error

App Files Files Community

ysharma HF staff commited on Mar 21, 2023

Commit

a58bd0b

•

1 Parent(s): 890f63d

update glm stream

Browse files

Files changed (1) hide show

app.py +26 -80

app.py CHANGED Viewed

@@ -93,75 +93,6 @@ def predict_chatgpt(inputs, top_p_chatgpt, temperature_chatgpt, openai_api_key,
               yield chat, history, chat_counter_chatgpt  # this resembles {chatbot: chat, state: history}
-#Predict function for OPENCHATKIT
-def predict_together(model: str,
-        inputs: str,
-        top_p: float,
-        temperature: float,
-        top_k: int,
-        repetition_penalty: float,
-        watermark: bool,
-        chatbot,
-        history,):
-    client = Client(os.getenv("API_URL_TGTHR")) #get_client(model)
-    # debug
-    #print(f"^^client is - {client}")
-    user_name, assistant_name = "<human>: ", "<bot>: "
-    preprompt  = openchat_preprompt
-    sep = '\n'
-    history.append(inputs)
-    past = []
-    for data in chatbot:
-        user_data, model_data = data
-        if not user_data.startswith(user_name):
-            user_data = user_name + user_data
-        if not model_data.startswith("\n" + assistant_name):
-            model_data = "\n" + assistant_name + model_data
-        past.append(user_data + model_data.rstrip() + "\n")
-    if not inputs.startswith(user_name):
-        inputs = user_name + inputs
-    total_inputs = preprompt + "".join(past) + inputs + "\n" + assistant_name.rstrip()
-    # truncate total_inputs
-    #total_inputs = total_inputs[-1000:]
-    partial_words = ""
-    for i, response in enumerate(client.generate_stream(
-            total_inputs,
-            top_p=top_p,
-            top_k=top_k,
-            repetition_penalty=repetition_penalty,
-            watermark=watermark,
-            temperature=temperature,
-            max_new_tokens=500,
-            stop_sequences=[user_name.rstrip(), assistant_name.rstrip()],
-    )):
-        if response.token.special:
-            continue
-        partial_words = partial_words + response.token.text
-        if partial_words.endswith(user_name.rstrip()):
-            partial_words = partial_words.rstrip(user_name.rstrip())
-        if partial_words.endswith(assistant_name.rstrip()):
-            partial_words = partial_words.rstrip(assistant_name.rstrip())
-        if i == 0:
-            history.append(" " + partial_words)
-        else:
-            history[-1] = partial_words
-        chat = [
-            (history[i].strip(), history[i + 1].strip()) for i in range(0, len(history) - 1, 2)
-        ]
-        yield chat, history
 # Define function to generate model predictions and update the history
 def predict_glm(input, history=[]):
     response, history = model_glm.chat(tokenizer_glm, input, history)
@@ -177,6 +108,21 @@ def translate_Chinese_English(chinese_text):
     trans_eng_text = tokenizer_chtoen.batch_decode(generated_tokens, skip_special_tokens=True)
     return trans_eng_text[0]
 """
 def predict(input, max_length, top_p, temperature, history=None):
     if history is None:
@@ -185,7 +131,7 @@ def predict(input, max_length, top_p, temperature, history=None):
                                                temperature=temperature):
         updates = []
         for query, response in history:
-            updates.append(gr.update(visible=True, value="用户：" + query))
             updates.append(gr.update(visible=True, value="ChatGLM-6B：" + response))
         if len(updates) < MAX_BOXES:
             updates = updates + [gr.Textbox.update(visible=False)] * (MAX_BOXES - len(updates))
@@ -265,21 +211,21 @@ with gr.Blocks(css="""#col_container {width: 1000px; margin-left: auto; margin-r
     inputs.submit( predict_chatgpt,
                 [inputs, top_p_chatgpt, temperature_chatgpt, openai_api_key, chat_counter_chatgpt, chatbot_chatgpt, state_chatgpt],
                 [chatbot_chatgpt, state_chatgpt, chat_counter_chatgpt],)
-    #inputs.submit( predict_together,
-    #            [temp_textbox_together, inputs, top_p, temperature, top_k, repetition_penalty, watermark, chatbot_together, state_together, ],
-    #            [chatbot_together, state_together],)
-    inputs.submit( predict_glm,
                 [inputs, state_glm, ],
                 [chatbot_glm, state_glm],)
     b1.click( predict_chatgpt,
                 [inputs, top_p_chatgpt, temperature_chatgpt, openai_api_key, chat_counter_chatgpt, chatbot_chatgpt, state_chatgpt],
                 [chatbot_chatgpt, state_chatgpt, chat_counter_chatgpt],)
-    #b1.click( predict_together,
-    #            [temp_textbox_together, inputs, top_p, temperature, top_k, repetition_penalty, watermark, chatbot_together, state_together, ],
-    #            [chatbot_together, state_together],)
-    b1.click( predict_glm,
-                [inputs, state_glm, ],
-                [chatbot_glm, state_glm],)
     b2.click(reset_chat, [chatbot_chatgpt, state_chatgpt], [chatbot_chatgpt, state_chatgpt])
     #b2.click(reset_chat, [chatbot_together, state_together], [chatbot_together, state_together])

               yield chat, history, chat_counter_chatgpt  # this resembles {chatbot: chat, state: history}
 # Define function to generate model predictions and update the history
 def predict_glm(input, history=[]):
     response, history = model_glm.chat(tokenizer_glm, input, history)
     trans_eng_text = tokenizer_chtoen.batch_decode(generated_tokens, skip_special_tokens=True)
     return trans_eng_text[0]
+# Define function to generate model predictions and update the history
+def predict_glm_stream(input, history=[]): #, top_p, temperature):
+    response, history = model_glm.chat(tokenizer_glm, input, history)
+    print(f"outside for loop resonse is ^^- {response}")
+    print(f"outside for loop history is ^^- {history}")
+    top_p, temperature = 1.0, 1.0
+    for response, history in model.stream_chat(tokenizer_glm, input, history, top_p=top_p, temperature=temperature):   #max_length=max_length,
+        print(f"In for loop resonse is ^^- {response}")
+        print(f"In for loop history is ^^- {history}")
+        # translate Chinese to English
+        history = [(query, translate_Chinese_English(response)) for query, response in history]
+        print(f"In for loop translated history is ^^- {history}")
+        yield history, history #[history] + updates
 """
 def predict(input, max_length, top_p, temperature, history=None):
     if history is None:
                                                temperature=temperature):
         updates = []
         for query, response in history:
+            updates.append(gr.update(visible=True, value="user：" + query))  #用户
             updates.append(gr.update(visible=True, value="ChatGLM-6B：" + response))
         if len(updates) < MAX_BOXES:
             updates = updates + [gr.Textbox.update(visible=False)] * (MAX_BOXES - len(updates))
     inputs.submit( predict_chatgpt,
                 [inputs, top_p_chatgpt, temperature_chatgpt, openai_api_key, chat_counter_chatgpt, chatbot_chatgpt, state_chatgpt],
                 [chatbot_chatgpt, state_chatgpt, chat_counter_chatgpt],)
+   #inputs.submit( predict_glm,
+   #            [inputs, state_glm, ],
+   #            [chatbot_glm, state_glm],)
+   #b1.click( predict_glm,
+   #            [inputs, state_glm, ],
+   #            [chatbot_glm, state_glm],)
+    inputs.submit( predict_glm_stream,
+                [inputs, state_glm, ],
+                [chatbot_glm, state_glm],)
+    b1.click( predict_glm_stream,
                 [inputs, state_glm, ],
                 [chatbot_glm, state_glm],)
     b1.click( predict_chatgpt,
                 [inputs, top_p_chatgpt, temperature_chatgpt, openai_api_key, chat_counter_chatgpt, chatbot_chatgpt, state_chatgpt],
                 [chatbot_chatgpt, state_chatgpt, chat_counter_chatgpt],)
     b2.click(reset_chat, [chatbot_chatgpt, state_chatgpt], [chatbot_chatgpt, state_chatgpt])
     #b2.click(reset_chat, [chatbot_together, state_together], [chatbot_together, state_together])