ysharma HF staff commited on
Commit
eed9231
1 Parent(s): 2c12f3b

update code for real streaming

Browse files
Files changed (1) hide show
  1. app.py +45 -1
app.py CHANGED
@@ -7,7 +7,7 @@ import requests
7
  #Streaming endpoint
8
  API_URL = os.getenv("API_URL") + "/generate_stream"
9
 
10
- def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
11
  if not inputs.startswith("User: "):
12
  inputs = "User: " + inputs + "\n"
13
  payload = {
@@ -48,6 +48,50 @@ def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
48
 
49
  yield chat, history #resembles {chatbot: chat, state: history}
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
52
  description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
53
  ```
 
7
  #Streaming endpoint
8
  API_URL = os.getenv("API_URL") + "/generate_stream"
9
 
10
+ def predict_old(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
11
  if not inputs.startswith("User: "):
12
  inputs = "User: " + inputs + "\n"
13
  payload = {
 
48
 
49
  yield chat, history #resembles {chatbot: chat, state: history}
50
 
51
+
52
+ def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
53
+ if not inputs.startswith("User: "):
54
+ inputs = "User: " + inputs + "\n"
55
+ payload = {
56
+ "inputs": inputs, #"My name is Jane and I",
57
+ "parameters": {
58
+ "details": True,
59
+ "do_sample": True,
60
+ "max_new_tokens": 100,
61
+ "repetition_penalty": repetition_penalty, #1.03,
62
+ "seed": 0,
63
+ "temperature": temperature, #0.5,
64
+ "top_k": top_k, #10,
65
+ "top_p": top_p #0.95
66
+ }
67
+ }
68
+
69
+ headers = {
70
+ 'accept': 'text/event-stream',
71
+ 'Content-Type': 'application/json'
72
+ }
73
+
74
+ history.append(inputs)
75
+ response = requests.post(API_URL2, headers=headers, json=payload, stream=True)
76
+ token_counter = 0
77
+ partial_words = "" #inputs
78
+ for chunk in response.iter_lines():
79
+ if chunk:
80
+ #print(chunk.decode())
81
+ partial_words = partial_words + json.loads(chunk.decode()[5:])['token']['text']
82
+ #print(partial_words)
83
+ time.sleep(0.05)
84
+ #print([(partial_words, "")])
85
+ if token_counter == 0:
86
+ history.append(" " + partial_words)
87
+ else:
88
+ history[-1] = partial_words
89
+ chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
90
+ #yield [(partial_words, history)]
91
+ token_counter+=1
92
+ yield chat, history #{chatbot: chat, state: history} #[(partial_words, history)]
93
+
94
+
95
  title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
96
  description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
97
  ```