Spaces:

xu-song
/

kplug

Running

xusong28 commited on Aug 30, 2023

Commit

d0547d2

1 Parent(s): dc5d472

update

Files changed (3) hide show

demo_chatbot_jddc.py CHANGED Viewed

@@ -17,11 +17,14 @@ tokenizer = BertTokenizer.from_pretrained("eson/kplug-base-jddc")
 def predict(input, history=[]):
     # append the new user input tokens to the chat history
     history = history + [input]  # history如果包含错误的response，可能会造成误差传递
     # tokenize the new input sentence
-    bot_input_ids = tokenizer.encode("".join(history)[-500:], return_tensors='pt')  # 直接用空格拼接的，太粗糙，且不区分角色。
     # bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)

 def predict(input, history=[]):
+    """
+    拼接方案：直接拼接history作为输入，不区分角色。虽然简单粗糙，但是encoder-decoder架构不会混淆输入和输出（如果是gpt架构就需要区分角色了）。
+    """
     # append the new user input tokens to the chat history
     history = history + [input]  # history如果包含错误的response，可能会造成误差传递
     # tokenize the new input sentence
+    bot_input_ids = tokenizer.encode("".join(history)[-500:], return_tensors='pt')
     # bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)

demo_corrector.py CHANGED Viewed

@@ -52,7 +52,9 @@ def mock_data():
 def correct(sent):
     corrected_sent, errs = corrector.bert_correct(sent)
     # corrected_sent, errs = mock_data()
     print("original sentence:{} => {}, err:{}".format(sent, corrected_sent, errs))
@@ -79,7 +81,7 @@ corr_iface = gr.Interface(
         ),
         gr.JSON(
-            label="JSON Output"
         )
     ],
     examples=error_sentences,

 def correct(sent):
+    """
+    {"text": sent, "entities": [{}, {}] } 是 gradio 要求的格式，详见 https://www.gradio.app/docs/highlightedtext
+    """
     corrected_sent, errs = corrector.bert_correct(sent)
     # corrected_sent, errs = mock_data()
     print("original sentence:{} => {}, err:{}".format(sent, corrected_sent, errs))
         ),
         gr.JSON(
+            # label="JSON Output"
         )
     ],
     examples=error_sentences,

demo_sum.py CHANGED Viewed

@@ -109,10 +109,14 @@ gen_mode_params = {
         "num_beams": 10,
         "do_sample": False,
     },
     "contrastive search": {
         "top_k": 4,
         "penalty_alpha": 0.2,
     },
     "diverse beam search": {
         "num_beams": 5,
         "num_beam_groups": 5,

         "num_beams": 10,
         "do_sample": False,
     },
+    # 算法？ 复杂度？
     "contrastive search": {
         "top_k": 4,
         "penalty_alpha": 0.2,
     },
+    # 算法？ 复杂度？
     "diverse beam search": {
         "num_beams": 5,
         "num_beam_groups": 5,