pseudotensor commited on
Commit
d5357c2
1 Parent(s): 31cc3ef

Update with h2oGPT hash c8a6d015ef453351b3c2dff18a23370f380278aa

Browse files
Files changed (2) hide show
  1. gradio_runner.py +2 -2
  2. prompter.py +2 -1
gradio_runner.py CHANGED
@@ -246,7 +246,7 @@ def go_gradio(**kwargs):
246
  value=kwargs['top_k'], label="Top k",
247
  info='Num. tokens to sample from'
248
  )
249
- max_beams = 8 if not is_low_mem else 2
250
  num_beams = gr.Slider(minimum=1, maximum=max_beams, step=1,
251
  value=min(max_beams, kwargs['num_beams']), label="Beams",
252
  info="Number of searches for optimal overall probability. "
@@ -865,7 +865,7 @@ def go_gradio(**kwargs):
865
  # FIXME: have to click once to stop output and second time to stop GPUs going
866
  stop_btn.click(lambda: None, None, None,
867
  cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
868
- queue=False, api_name='stop' if allow_api else None).then(clear_torch_cache)
869
  demo.load(None, None, None, _js=get_dark_js() if kwargs['h2ocolors'] else None)
870
 
871
  demo.queue(concurrency_count=kwargs['concurrency_count'], api_open=kwargs['api_open'])
 
246
  value=kwargs['top_k'], label="Top k",
247
  info='Num. tokens to sample from'
248
  )
249
+ max_beams = 8 if not is_low_mem else 1
250
  num_beams = gr.Slider(minimum=1, maximum=max_beams, step=1,
251
  value=min(max_beams, kwargs['num_beams']), label="Beams",
252
  info="Number of searches for optimal overall probability. "
 
865
  # FIXME: have to click once to stop output and second time to stop GPUs going
866
  stop_btn.click(lambda: None, None, None,
867
  cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
868
+ queue=False, api_name='stop' if allow_api else None).then(clear_torch_cache, queue=False)
869
  demo.load(None, None, None, _js=get_dark_js() if kwargs['h2ocolors'] else None)
870
 
871
  demo.queue(concurrency_count=kwargs['concurrency_count'], api_open=kwargs['api_open'])
prompter.py CHANGED
@@ -71,7 +71,8 @@ class Prompter(object):
71
  output = output.split(self.pre_response)[1]
72
  allow_terminate = True
73
  else:
74
- print("Failure of parsing or not enough output yet: %s" % output, flush=True)
 
75
  allow_terminate = False
76
  else:
77
  allow_terminate = True
 
71
  output = output.split(self.pre_response)[1]
72
  allow_terminate = True
73
  else:
74
+ if output:
75
+ print("Failure of parsing or not enough output yet: %s" % output, flush=True)
76
  allow_terminate = False
77
  else:
78
  allow_terminate = True