pseudotensor commited on
Commit
09d4719
1 Parent(s): 7246b62

Update with h2oGPT hash 27616ac37a45f19994b9d1893953791e1644b3f1

Browse files
Files changed (3) hide show
  1. app.py +107 -89
  2. client_test.py +56 -28
  3. finetune.py +7 -1
app.py CHANGED
@@ -83,6 +83,10 @@ def main(
83
  # set to True to load --base_model after client logs in,
84
  # to be able to free GPU memory when model is swapped
85
  login_mode_if_model0: bool = False,
 
 
 
 
86
 
87
  sanitize_user_prompt: bool = True,
88
  sanitize_bot_response: bool = True,
@@ -116,6 +120,12 @@ def main(
116
  # must override share if in spaces
117
  share = False
118
  save_dir = os.getenv('SAVE_DIR', save_dir)
 
 
 
 
 
 
119
 
120
  # get defaults
121
  model_lower = base_model.lower()
@@ -166,7 +176,7 @@ def main(
166
  assert data[i]['conversations'][turn_start + 1]['from'] == 'gpt'
167
  output = data[i]['conversations'][turn_start + 1]['value']
168
  examplenew = example1.copy()
169
- assert not chat, "No gradio must use chat=False, uses nochat isntruct"
170
  examplenew[eval_func_param_names.index('instruction_nochat')] = instruction
171
  examplenew[eval_func_param_names.index('iinput_nochat')] = '' # no input
172
  examplenew[eval_func_param_names.index('context')] = '' # no context
@@ -528,6 +538,7 @@ def get_score_model(**kwargs):
528
 
529
  def go_gradio(**kwargs):
530
  # get default model
 
531
  all_kwargs = kwargs.copy()
532
  all_kwargs.update(locals())
533
  if kwargs.get('base_model') and not kwargs['login_mode_if_model0']:
@@ -726,12 +737,12 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
726
  placeholder=kwargs['placeholder_input'])
727
  submit_nochat = gr.Button("Submit")
728
  flag_btn_nochat = gr.Button("Flag")
729
- if kwargs['score_model']:
730
- if not kwargs['auto_score']:
731
- with gr.Column():
732
- score_btn_nochat = gr.Button("Score last prompt & response")
733
- score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
734
- else:
735
  score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
736
  col_chat = gr.Column(visible=kwargs['chat'])
737
  with col_chat:
@@ -751,19 +762,19 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
751
  with gr.Row():
752
  clear = gr.Button("New Conversation")
753
  flag_btn = gr.Button("Flag")
754
- if kwargs['score_model']:
755
- if not kwargs['auto_score']: # FIXME: For checkbox model2
756
- with gr.Column():
757
- with gr.Row():
758
- score_btn = gr.Button("Score last prompt & response").style(
759
- full_width=False, size='sm')
760
- score_text = gr.Textbox("Response Score: NA", show_label=False)
761
- score_res2 = gr.Row(visible=False)
762
- with score_res2:
763
- score_btn2 = gr.Button("Score last prompt & response 2").style(
764
- full_width=False, size='sm')
765
- score_text2 = gr.Textbox("Response Score2: NA", show_label=False)
766
- else:
767
  score_text = gr.Textbox("Response Score: NA", show_label=False)
768
  score_text2 = gr.Textbox("Response Score2: NA", show_label=False, visible=False)
769
  retry = gr.Button("Regenerate")
@@ -942,7 +953,6 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
942
  fun = partial(evaluate,
943
  **kwargs_evaluate)
944
  fun2 = partial(evaluate,
945
- model_state2,
946
  **kwargs_evaluate)
947
 
948
  dark_mode_btn = gr.Button("Dark Mode", variant="primary").style(
@@ -953,7 +963,7 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
953
  None,
954
  None,
955
  _js=dark_js,
956
- api_name="dark",
957
  )
958
 
959
  # Control chat and non-chat blocks, which can be independently used by chat checkbox swap
@@ -966,7 +976,7 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
966
  def context_fun(x):
967
  return gr.Textbox.update(visible=not x)
968
 
969
- chat.select(col_nochat_fun, chat, col_nochat, api_name="chat_checkbox") \
970
  .then(col_chat_fun, chat, col_chat) \
971
  .then(context_fun, chat, context)
972
 
@@ -1042,25 +1052,31 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
1042
  os.environ['TOKENIZERS_PARALLELISM'] = 'true'
1043
  return 'Response Score: {:.1%}'.format(score)
1044
 
 
 
1045
  if kwargs['score_model']:
1046
- score_args = dict(fn=score_last_response,
1047
- inputs=inputs_list + [text_output],
1048
- outputs=[score_text],
1049
- )
1050
- score_args2 = dict(fn=partial(score_last_response, model2=True),
1051
- inputs=inputs_list + [text_output2],
1052
- outputs=[score_text2],
1053
- )
 
 
 
 
1054
 
1055
- score_args_nochat = dict(fn=partial(score_last_response, nochat=True),
1056
- inputs=inputs_list + [text_output_nochat],
1057
- outputs=[score_text_nochat],
1058
- )
1059
- if not kwargs['auto_score']:
1060
- score_event = score_btn.click(**score_args, queue=stream_output, api_name='score') \
1061
- .then(**score_args2, queue=stream_output, api_name='score2')
1062
- score_event_nochat = score_btn_nochat.click(**score_args_nochat, queue=stream_output,
1063
- api_name='score_nochat')
1064
 
1065
  def user(*args, undo=False, sanitize_user_prompt=True, model2=False):
1066
  """
@@ -1208,64 +1224,64 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
1208
  if kwargs['auto_score']:
1209
  # in case 2nd model, consume instruction first, so can clear quickly
1210
  # bot doesn't consume instruction itself, just history from user, so why works
1211
- submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction') \
1212
- .then(**user_args2, queue=stream_output, api_name='instruction2') \
1213
  .then(clear_instruct, None, instruction) \
1214
- .then(**bot_args, api_name='instruction_bot') \
1215
- .then(**score_args, api_name='instruction_bot_score') \
1216
- .then(**bot_args2, api_name='instruction_bot2') \
1217
- .then(**score_args2, api_name='instruction_bot_score2') \
1218
  .then(clear_torch_cache)
1219
- submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit') \
1220
- .then(**user_args2, queue=stream_output, api_name='submit2') \
1221
- .then(**bot_args, api_name='submit_bot') \
1222
  .then(clear_instruct, None, instruction) \
1223
- .then(**score_args, api_name='submit_bot_score') \
1224
- .then(**bot_args2, api_name='submit_bot2') \
1225
- .then(**score_args2, api_name='submit_bot_score2') \
1226
  .then(clear_torch_cache)
1227
- submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry') \
1228
- .then(**user_args2, queue=stream_output, api_name='retry2') \
1229
  .then(clear_instruct, None, instruction) \
1230
- .then(**retry_bot_args, api_name='retry_bot') \
1231
- .then(**score_args, api_name='retry_bot_score') \
1232
- .then(**retry_bot_args2, api_name='retry_bot2') \
1233
- .then(**score_args2, api_name='retry_bot_score2') \
1234
  .then(clear_torch_cache)
1235
- submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo') \
1236
- .then(**score_args, api_name='undo_score') \
1237
- .then(**undo_user_args2, queue=stream_output, api_name='undo2') \
1238
- .then(**score_args2, api_name='undo_score2') \
1239
  .then(clear_instruct, None, instruction)
1240
  else:
1241
- submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction') \
1242
- .then(**user_args2, queue=stream_output, api_name='instruction2') \
1243
  .then(clear_instruct, None, instruction) \
1244
- .then(**bot_args, api_name='instruction_bot') \
1245
- .then(**bot_args2, api_name='instruction_bot2') \
1246
  .then(clear_torch_cache)
1247
- submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit') \
1248
- .then(**user_args2, queue=stream_output, api_name='submit2') \
1249
  .then(clear_instruct, None, instruction) \
1250
- .then(**bot_args, api_name='submit_bot') \
1251
- .then(**bot_args2, api_name='submit_bot2') \
1252
  .then(clear_torch_cache)
1253
- submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry') \
1254
- .then(**user_args2, queue=stream_output, api_name='retry2') \
1255
  .then(clear_instruct, None, instruction) \
1256
- .then(**retry_bot_args, api_name='retry_bot') \
1257
- .then(**retry_bot_args2, api_name='retry_bot2') \
1258
  .then(clear_torch_cache)
1259
- submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo') \
1260
- .then(**undo_user_args2, queue=stream_output, api_name='undo2')
1261
 
1262
  # does both models
1263
- clear.click(lambda: None, None, text_output, queue=False, api_name='clear') \
1264
- .then(lambda: None, None, text_output2, queue=False, api_name='clear2')
1265
  # FIXME: compare
1266
  submit_event_nochat = submit_nochat.click(fun, inputs=[model_state] + inputs_list,
1267
- outputs=text_output_nochat, api_name='submit_nochat') \
1268
- .then(**score_args_nochat, api_name='instruction_bot_score_nochat') \
1269
  .then(clear_torch_cache)
1270
 
1271
  def load_model(model_name, lora_weights, model_state_old, prompt_type_old, load_8bit, infer_devices, gpu_id):
@@ -1380,7 +1396,7 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
1380
  inputs=[lora_options_state, new_lora, model_used, lora_used, model_used2, lora_used2],
1381
  outputs=[lora_choice, lora_choice2, new_lora, lora_options_state])
1382
 
1383
- go_btn.click(lambda: gr.update(visible=False), None, go_btn, api_name="go") \
1384
  .then(lambda: gr.update(visible=True), None, normal_block) \
1385
  .then(**load_model_args).then(**prompt_update_args)
1386
 
@@ -1393,7 +1409,8 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
1393
  def compare_prompt_fun(x):
1394
  return gr.Dropdown.update(visible=x)
1395
 
1396
- compare_checkbox.select(compare_textbox_fun, compare_checkbox, text_output2, api_name="compare_checkbox") \
 
1397
  .then(compare_column_fun, compare_checkbox, col_model2) \
1398
  .then(compare_prompt_fun, compare_checkbox, prompt_type2) \
1399
  .then(compare_textbox_fun, compare_checkbox, score_text2)
@@ -1402,28 +1419,29 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
1402
  # callback for logging flagged input/output
1403
  callback.setup(inputs_list + [text_output], "flagged_data_points")
1404
  flag_btn.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
1405
- api_name='flag')
1406
  flag_btn_nochat.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
1407
- api_name='flag_nochat')
1408
 
1409
  def get_system_info():
1410
  return gr.Textbox.update(value=system_info_print())
1411
 
1412
- system_event = system_btn.click(get_system_info, outputs=system_text, api_name='system_info')
1413
 
1414
  # don't pass text_output, don't want to clear output, just stop it
1415
  # FIXME: have to click once to stop output and second time to stop GPUs going
1416
  stop_btn.click(lambda: None, None, None,
1417
  cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
1418
- queue=False, api_name='stop').then(clear_torch_cache)
1419
- demo.load(None,None,None,_js=dark_js)
1420
 
1421
- demo.queue(concurrency_count=1)
1422
  favicon_path = "h2o-logo.svg"
1423
  demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
1424
  favicon_path=favicon_path, prevent_thread_lock=True) # , enable_queue=True)
1425
  print("Started GUI", flush=True)
1426
- demo.block_thread()
 
1427
 
1428
 
1429
  input_args_list = ['model_state']
 
83
  # set to True to load --base_model after client logs in,
84
  # to be able to free GPU memory when model is swapped
85
  login_mode_if_model0: bool = False,
86
+ block_gradio_exit: bool = True,
87
+ concurrency_count: int = 1,
88
+ api_open: bool = False, # don't let API skip queue
89
+ allow_api: bool = True,
90
 
91
  sanitize_user_prompt: bool = True,
92
  sanitize_bot_response: bool = True,
 
120
  # must override share if in spaces
121
  share = False
122
  save_dir = os.getenv('SAVE_DIR', save_dir)
123
+ score_model = os.getenv('SCORE_MODEL', score_model)
124
+ if score_model == 'None':
125
+ score_model = ''
126
+ concurrency_count = int(os.getenv('CONCURRENCY_COUNT', concurrency_count))
127
+ api_open = bool(int(os.getenv('API_OPEN', api_open)))
128
+ allow_api = bool(int(os.getenv('ALLOW_API', allow_api)))
129
 
130
  # get defaults
131
  model_lower = base_model.lower()
 
176
  assert data[i]['conversations'][turn_start + 1]['from'] == 'gpt'
177
  output = data[i]['conversations'][turn_start + 1]['value']
178
  examplenew = example1.copy()
179
+ assert not chat, "No gradio must use chat=False, uses nochat instruct"
180
  examplenew[eval_func_param_names.index('instruction_nochat')] = instruction
181
  examplenew[eval_func_param_names.index('iinput_nochat')] = '' # no input
182
  examplenew[eval_func_param_names.index('context')] = '' # no context
 
538
 
539
  def go_gradio(**kwargs):
540
  # get default model
541
+ allow_api = kwargs['allow_api']
542
  all_kwargs = kwargs.copy()
543
  all_kwargs.update(locals())
544
  if kwargs.get('base_model') and not kwargs['login_mode_if_model0']:
 
737
  placeholder=kwargs['placeholder_input'])
738
  submit_nochat = gr.Button("Submit")
739
  flag_btn_nochat = gr.Button("Flag")
740
+ if not kwargs['auto_score']:
741
+ with gr.Column(visible=kwargs['score_model']):
742
+ score_btn_nochat = gr.Button("Score last prompt & response")
743
+ score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
744
+ else:
745
+ with gr.Column(visible=kwargs['score_model']):
746
  score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
747
  col_chat = gr.Column(visible=kwargs['chat'])
748
  with col_chat:
 
762
  with gr.Row():
763
  clear = gr.Button("New Conversation")
764
  flag_btn = gr.Button("Flag")
765
+ if not kwargs['auto_score']: # FIXME: For checkbox model2
766
+ with gr.Column(visible=kwargs['score_model']):
767
+ with gr.Row():
768
+ score_btn = gr.Button("Score last prompt & response").style(
769
+ full_width=False, size='sm')
770
+ score_text = gr.Textbox("Response Score: NA", show_label=False)
771
+ score_res2 = gr.Row(visible=False)
772
+ with score_res2:
773
+ score_btn2 = gr.Button("Score last prompt & response 2").style(
774
+ full_width=False, size='sm')
775
+ score_text2 = gr.Textbox("Response Score2: NA", show_label=False)
776
+ else:
777
+ with gr.Column(visible=kwargs['score_model']):
778
  score_text = gr.Textbox("Response Score: NA", show_label=False)
779
  score_text2 = gr.Textbox("Response Score2: NA", show_label=False, visible=False)
780
  retry = gr.Button("Regenerate")
 
953
  fun = partial(evaluate,
954
  **kwargs_evaluate)
955
  fun2 = partial(evaluate,
 
956
  **kwargs_evaluate)
957
 
958
  dark_mode_btn = gr.Button("Dark Mode", variant="primary").style(
 
963
  None,
964
  None,
965
  _js=dark_js,
966
+ api_name="dark" if allow_api else None,
967
  )
968
 
969
  # Control chat and non-chat blocks, which can be independently used by chat checkbox swap
 
976
  def context_fun(x):
977
  return gr.Textbox.update(visible=not x)
978
 
979
+ chat.select(col_nochat_fun, chat, col_nochat, api_name="chat_checkbox" if allow_api else None) \
980
  .then(col_chat_fun, chat, col_chat) \
981
  .then(context_fun, chat, context)
982
 
 
1052
  os.environ['TOKENIZERS_PARALLELISM'] = 'true'
1053
  return 'Response Score: {:.1%}'.format(score)
1054
 
1055
+ def noop_score_last_response(*args, **kwargs):
1056
+ return "Response Score: Disabled"
1057
  if kwargs['score_model']:
1058
+ score_fun = score_last_response
1059
+ else:
1060
+ score_fun = noop_score_last_response
1061
+
1062
+ score_args = dict(fn=score_fun,
1063
+ inputs=inputs_list + [text_output],
1064
+ outputs=[score_text],
1065
+ )
1066
+ score_args2 = dict(fn=partial(score_fun, model2=True),
1067
+ inputs=inputs_list + [text_output2],
1068
+ outputs=[score_text2],
1069
+ )
1070
 
1071
+ score_args_nochat = dict(fn=partial(score_fun, nochat=True),
1072
+ inputs=inputs_list + [text_output_nochat],
1073
+ outputs=[score_text_nochat],
1074
+ )
1075
+ if not kwargs['auto_score']:
1076
+ score_event = score_btn.click(**score_args, queue=stream_output, api_name='score' if allow_api else None) \
1077
+ .then(**score_args2, queue=stream_output, api_name='score2' if allow_api else None)
1078
+ score_event_nochat = score_btn_nochat.click(**score_args_nochat, queue=stream_output,
1079
+ api_name='score_nochat' if allow_api else None)
1080
 
1081
  def user(*args, undo=False, sanitize_user_prompt=True, model2=False):
1082
  """
 
1224
  if kwargs['auto_score']:
1225
  # in case 2nd model, consume instruction first, so can clear quickly
1226
  # bot doesn't consume instruction itself, just history from user, so why works
1227
+ submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction' if allow_api else None) \
1228
+ .then(**user_args2, queue=stream_output, api_name='instruction2' if allow_api else None) \
1229
  .then(clear_instruct, None, instruction) \
1230
+ .then(**bot_args, api_name='instruction_bot' if allow_api else None) \
1231
+ .then(**score_args, api_name='instruction_bot_score' if allow_api else None) \
1232
+ .then(**bot_args2, api_name='instruction_bot2' if allow_api else None) \
1233
+ .then(**score_args2, api_name='instruction_bot_score2' if allow_api else None) \
1234
  .then(clear_torch_cache)
1235
+ submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit' if allow_api else None) \
1236
+ .then(**user_args2, queue=stream_output, api_name='submit2' if allow_api else None) \
1237
+ .then(**bot_args, api_name='submit_bot' if allow_api else None) \
1238
  .then(clear_instruct, None, instruction) \
1239
+ .then(**score_args, api_name='submit_bot_score' if allow_api else None) \
1240
+ .then(**bot_args2, api_name='submit_bot2' if allow_api else None) \
1241
+ .then(**score_args2, api_name='submit_bot_score2' if allow_api else None) \
1242
  .then(clear_torch_cache)
1243
+ submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry' if allow_api else None) \
1244
+ .then(**user_args2, queue=stream_output, api_name='retry2' if allow_api else None) \
1245
  .then(clear_instruct, None, instruction) \
1246
+ .then(**retry_bot_args, api_name='retry_bot' if allow_api else None) \
1247
+ .then(**score_args, api_name='retry_bot_score' if allow_api else None) \
1248
+ .then(**retry_bot_args2, api_name='retry_bot2' if allow_api else None) \
1249
+ .then(**score_args2, api_name='retry_bot_score2' if allow_api else None) \
1250
  .then(clear_torch_cache)
1251
+ submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo' if allow_api else None) \
1252
+ .then(**score_args, api_name='undo_score' if allow_api else None) \
1253
+ .then(**undo_user_args2, queue=stream_output, api_name='undo2' if allow_api else None) \
1254
+ .then(**score_args2, api_name='undo_score2' if allow_api else None) \
1255
  .then(clear_instruct, None, instruction)
1256
  else:
1257
+ submit_event = instruction.submit(**user_args, queue=stream_output, api_name='instruction' if allow_api else None) \
1258
+ .then(**user_args2, queue=stream_output, api_name='instruction2' if allow_api else None) \
1259
  .then(clear_instruct, None, instruction) \
1260
+ .then(**bot_args, api_name='instruction_bot' if allow_api else None) \
1261
+ .then(**bot_args2, api_name='instruction_bot2' if allow_api else None) \
1262
  .then(clear_torch_cache)
1263
+ submit_event2 = submit.click(**user_args, queue=stream_output, api_name='submit' if allow_api else None) \
1264
+ .then(**user_args2, queue=stream_output, api_name='submit2' if allow_api else None) \
1265
  .then(clear_instruct, None, instruction) \
1266
+ .then(**bot_args, api_name='submit_bot' if allow_api else None) \
1267
+ .then(**bot_args2, api_name='submit_bot2' if allow_api else None) \
1268
  .then(clear_torch_cache)
1269
+ submit_event3 = retry.click(**user_args, queue=stream_output, api_name='retry' if allow_api else None) \
1270
+ .then(**user_args2, queue=stream_output, api_name='retry2' if allow_api else None) \
1271
  .then(clear_instruct, None, instruction) \
1272
+ .then(**retry_bot_args, api_name='retry_bot' if allow_api else None) \
1273
+ .then(**retry_bot_args2, api_name='retry_bot2' if allow_api else None) \
1274
  .then(clear_torch_cache)
1275
+ submit_event4 = undo.click(**undo_user_args, queue=stream_output, api_name='undo' if allow_api else None) \
1276
+ .then(**undo_user_args2, queue=stream_output, api_name='undo2' if allow_api else None)
1277
 
1278
  # does both models
1279
+ clear.click(lambda: None, None, text_output, queue=False, api_name='clear' if allow_api else None) \
1280
+ .then(lambda: None, None, text_output2, queue=False, api_name='clear2' if allow_api else None)
1281
  # FIXME: compare
1282
  submit_event_nochat = submit_nochat.click(fun, inputs=[model_state] + inputs_list,
1283
+ outputs=text_output_nochat, api_name='submit_nochat' if allow_api else None) \
1284
+ .then(**score_args_nochat, api_name='instruction_bot_score_nochat' if allow_api else None) \
1285
  .then(clear_torch_cache)
1286
 
1287
  def load_model(model_name, lora_weights, model_state_old, prompt_type_old, load_8bit, infer_devices, gpu_id):
 
1396
  inputs=[lora_options_state, new_lora, model_used, lora_used, model_used2, lora_used2],
1397
  outputs=[lora_choice, lora_choice2, new_lora, lora_options_state])
1398
 
1399
+ go_btn.click(lambda: gr.update(visible=False), None, go_btn, api_name="go" if allow_api else None) \
1400
  .then(lambda: gr.update(visible=True), None, normal_block) \
1401
  .then(**load_model_args).then(**prompt_update_args)
1402
 
 
1409
  def compare_prompt_fun(x):
1410
  return gr.Dropdown.update(visible=x)
1411
 
1412
+ compare_checkbox.select(compare_textbox_fun, compare_checkbox, text_output2,
1413
+ api_name="compare_checkbox" if allow_api else None) \
1414
  .then(compare_column_fun, compare_checkbox, col_model2) \
1415
  .then(compare_prompt_fun, compare_checkbox, prompt_type2) \
1416
  .then(compare_textbox_fun, compare_checkbox, score_text2)
 
1419
  # callback for logging flagged input/output
1420
  callback.setup(inputs_list + [text_output], "flagged_data_points")
1421
  flag_btn.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
1422
+ api_name='flag' if allow_api else None)
1423
  flag_btn_nochat.click(lambda *args: callback.flag(args), inputs_list + [text_output], None, preprocess=False,
1424
+ api_name='flag_nochat' if allow_api else None)
1425
 
1426
  def get_system_info():
1427
  return gr.Textbox.update(value=system_info_print())
1428
 
1429
+ system_event = system_btn.click(get_system_info, outputs=system_text, api_name='system_info' if allow_api else None)
1430
 
1431
  # don't pass text_output, don't want to clear output, just stop it
1432
  # FIXME: have to click once to stop output and second time to stop GPUs going
1433
  stop_btn.click(lambda: None, None, None,
1434
  cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
1435
+ queue=False, api_name='stop' if allow_api else None).then(clear_torch_cache)
1436
+ demo.load(None, None, None, _js=dark_js)
1437
 
1438
+ demo.queue(concurrency_count=kwargs['concurrency_count'], api_open=kwargs['api_open'])
1439
  favicon_path = "h2o-logo.svg"
1440
  demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
1441
  favicon_path=favicon_path, prevent_thread_lock=True) # , enable_queue=True)
1442
  print("Started GUI", flush=True)
1443
+ if kwargs['block_gradio_exit']:
1444
+ demo.block_thread()
1445
 
1446
 
1447
  input_args_list = ['model_state']
client_test.py CHANGED
@@ -13,43 +13,69 @@ Currently, this will force model to be on a single GPU.
13
  Then run this client as:
14
 
15
  python client_test.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
 
18
  debug = False
19
 
20
  import os
21
  os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
22
- from gradio_client import Client
23
-
24
- client = Client("http://localhost:7860")
25
- if debug:
26
- print(client.view_api(all_endpoints=True))
27
-
28
- instruction = '' # only for chat=True
29
- iinput = '' # only for chat=True
30
- context = ''
31
- # streaming output is supported, loops over and outputs each generation in streaming mode
32
- # but leave stream_output=False for simple input/output mode
33
- stream_output = False
34
- prompt_type = 'human_bot'
35
- temperature = 0.1
36
- top_p = 0.75
37
- top_k = 40
38
- num_beams = 1
39
- max_new_tokens = 50
40
- min_new_tokens = 0
41
- early_stopping = False
42
- max_time = 20
43
- repetition_penalty = 1.0
44
- num_return_sequences = 1
45
- do_sample = True
46
- # only these 2 below used if pass chat=False
47
- chat = False
48
- instruction_nochat = "Who are you?"
49
- iinput_nochat = ''
50
 
51
 
52
  def test_client_basic():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  args = [instruction,
54
  iinput,
55
  context,
@@ -71,12 +97,14 @@ def test_client_basic():
71
  iinput_nochat,
72
  ]
73
  api_name = '/submit_nochat'
 
74
  res = client.predict(
75
  *tuple(args),
76
  api_name=api_name,
77
  )
78
  res_dict = dict(instruction_nochat=instruction_nochat, iinput_nochat=iinput_nochat, response=md_to_text(res))
79
  print(res_dict)
 
80
 
81
 
82
  import markdown # pip install markdown
 
13
  Then run this client as:
14
 
15
  python client_test.py
16
+
17
+
18
+
19
+ For HF spaces:
20
+
21
+ HOST="https://h2oai-h2ogpt-chatbot.hf.space" python client_test.py
22
+
23
+ Result:
24
+
25
+ Loaded as API: https://h2oai-h2ogpt-chatbot.hf.space ✔
26
+ {'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a large language model developed by LAION.'}
27
+
28
+
29
+ For demo:
30
+
31
+ HOST="https://gpt.h2o.ai" python client_test.py
32
+
33
+ Result:
34
+
35
+ Loaded as API: https://gpt.h2o.ai ✔
36
+ {'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a chatbot created by LAION.'}
37
+
38
  """
39
 
40
  debug = False
41
 
42
  import os
43
  os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
44
+
45
+
46
+ def get_client():
47
+ from gradio_client import Client
48
+
49
+ client = Client(os.getenv('HOST', "http://localhost:7860"))
50
+ if debug:
51
+ print(client.view_api(all_endpoints=True))
52
+ return client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
  def test_client_basic():
56
+ instruction = '' # only for chat=True
57
+ iinput = '' # only for chat=True
58
+ context = ''
59
+ # streaming output is supported, loops over and outputs each generation in streaming mode
60
+ # but leave stream_output=False for simple input/output mode
61
+ stream_output = False
62
+ prompt_type = 'human_bot'
63
+ temperature = 0.1
64
+ top_p = 0.75
65
+ top_k = 40
66
+ num_beams = 1
67
+ max_new_tokens = 50
68
+ min_new_tokens = 0
69
+ early_stopping = False
70
+ max_time = 20
71
+ repetition_penalty = 1.0
72
+ num_return_sequences = 1
73
+ do_sample = True
74
+ # only these 2 below used if pass chat=False
75
+ chat = False
76
+ instruction_nochat = "Who are you?"
77
+ iinput_nochat = ''
78
+
79
  args = [instruction,
80
  iinput,
81
  context,
 
97
  iinput_nochat,
98
  ]
99
  api_name = '/submit_nochat'
100
+ client = get_client()
101
  res = client.predict(
102
  *tuple(args),
103
  api_name=api_name,
104
  )
105
  res_dict = dict(instruction_nochat=instruction_nochat, iinput_nochat=iinput_nochat, response=md_to_text(res))
106
  print(res_dict)
107
+ return res_dict
108
 
109
 
110
  import markdown # pip install markdown
finetune.py CHANGED
@@ -765,7 +765,13 @@ Current Time: {}
765
 
766
  PreInput = None
767
 
768
- PreResponse = bot
 
 
 
 
 
 
769
 
770
  terminate_response = [start, PreResponse]
771
  elif prompt_type in [3, "3", "dai_faq"]:
 
765
 
766
  PreInput = None
767
 
768
+ if reduced:
769
+ # when making context, want it to appear as-if LLM generated, which starts with space after :
770
+ PreResponse = bot + ' '
771
+ else:
772
+ # normally LLM adds space after this, because was how trained.
773
+ # if add space here, non-unique tokenization will often make LLM produce wrong output
774
+ PreResponse = bot
775
 
776
  terminate_response = [start, PreResponse]
777
  elif prompt_type in [3, "3", "dai_faq"]: