Spaces:

ysharma
/

Gradio-demo-streaming

Paused

ysharma HF staff commited on Mar 1, 2023

Commit

a58f9b6

•

1 Parent(s): 76d05cb

update desc

Files changed (1) hide show

app.py CHANGED Viewed

@@ -30,22 +30,21 @@ def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
     }
     history.append(inputs)
     response = requests.post(API_URL, headers=headers, json=payload, stream=True)
     token_counter = 0
-    partial_words = "" #inputs
     for chunk in response.iter_lines():
       if chunk:
-        #print(chunk.decode())
         partial_words = partial_words + json.loads(chunk.decode()[5:])['token']['text']
-        #print(partial_words)
-        time.sleep(0.05)
-        #print([(partial_words, "")])
         if token_counter == 0:
           history.append(" " + partial_words)
         else:
           history[-1] = partial_words
         chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ]  # convert to tuples of list
-        #yield [(partial_words, history)]
         token_counter+=1
         yield chat, history #{chatbot: chat, state: history}  #[(partial_words, history)]
@@ -59,7 +58,7 @@ User: <utterance>
 Assistant: <utterance>
 ...
 ```
-In this app, you can explore the outputs of a large language models.
 """
 with gr.Blocks(css = "#chatbot {height: 400px; overflow: auto;}") as demo:
@@ -83,4 +82,3 @@ with gr.Blocks(css = "#chatbot {height: 400px; overflow: auto;}") as demo:
     gr.Markdown(description)
     demo.queue().launch(debug=True)

     }
     history.append(inputs)
+    # make a POST request to the API endpoint using the requests.post method, passing in stream=True
     response = requests.post(API_URL, headers=headers, json=payload, stream=True)
     token_counter = 0
+    partial_words = ""
+    # loop over the response data using the iter_lines method of the response object
     for chunk in response.iter_lines():
+        # check whether each line is non-empty
       if chunk:
+          # decode each line as response data is in bytes
         partial_words = partial_words + json.loads(chunk.decode()[5:])['token']['text']
         if token_counter == 0:
           history.append(" " + partial_words)
         else:
           history[-1] = partial_words
         chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ]  # convert to tuples of list
         token_counter+=1
         yield chat, history #{chatbot: chat, state: history}  #[(partial_words, history)]
 Assistant: <utterance>
 ...
 ```
+In this app, you can explore the outputs of a 20B large language model.
 """
 with gr.Blocks(css = "#chatbot {height: 400px; overflow: auto;}") as demo:
     gr.Markdown(description)
     demo.queue().launch(debug=True)