Spaces:

xu-song
/

kplug

Runtime error

App Files Files Community

xusong28 commited on Aug 24, 2022

Commit

a39e93b

•

1 Parent(s): 2bb0b26

update

Browse files

Files changed (4) hide show

app.py +5 -2
demo_corrector.py +46 -0
demo_mlm.py +8 -2
demo_sum.py +11 -7

app.py CHANGED Viewed

@@ -5,6 +5,10 @@
 """
 https://gradio.app/docs/#tabbedinterface-header
 """
 import gradio as gr
@@ -12,8 +16,7 @@ from demo_sum import sum_iface
 from demo_mlm import mlm_iface
-demo = gr.TabbedInterface([sum_iface, mlm_iface], ["商品摘要", "文本填词", "句子纠错"])
 if __name__ == "__main__":
     demo.launch()

 """
 https://gradio.app/docs/#tabbedinterface-header
+## 更多任务
+- 抽取式摘要
+- 检索式对话 、 抽取式问答
+-
 """
 import gradio as gr
 from demo_mlm import mlm_iface
+demo = gr.TabbedInterface([sum_iface, mlm_iface], ["生成式摘要", "文本填词", "句子纠错"])
 if __name__ == "__main__":
     demo.launch()

demo_corrector.py CHANGED Viewed

@@ -1,3 +1,49 @@
 # coding=utf-8
 # author: xusong <xusong28@jd.com>
 # time: 2022/8/23 17:08

 # coding=utf-8
 # author: xusong <xusong28@jd.com>
 # time: 2022/8/23 17:08
+import gradio as gr
+from transformers import FillMaskPipeline
+from transformers import BertTokenizer
+from modeling_kplug import KplugForMaskedLM
+model_dir = "models/pretrain/"
+tokenizer = BertTokenizer.from_pretrained(model_dir)
+model = KplugForMaskedLM.from_pretrained(model_dir)
+def correct(text):
+    pass
+# fill mask
+def fill_mask(text):
+    fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+    outputs = fill_masker(text)
+    return {i["token_str"]: i["score"] for i in outputs}
+mlm_examples = [
+    "这款连[MASK]裙真漂亮",
+    "这是杨[MASK]同款包包，精选优质皮料制作",
+    "美颜去痘洁面[MASK]",
+]
+mlm_iface = gr.Interface(
+    fn=fill_mask,
+    inputs=gr.inputs.Textbox(
+        label="输入文本",
+        default="这款连[MASK]裙真漂亮"),
+    outputs=gr.Label(
+        label="填词",
+        show_label=False,
+    ),
+    examples=mlm_examples,
+    title="文本填词",
+    description='<div>电商领域文本摘要， 基于KPLUG预训练语言模型。</div>'
+)
+if __name__ == "__main__":
+    # fill_mask("这款连[MASK]裙真漂亮")
+    mlm_iface.launch()

demo_mlm.py CHANGED Viewed

@@ -5,10 +5,16 @@
 """
 interface = gr.Interface.load(
     "models/bert-base-uncased", api_key=None, alias="fill-mask"
 )
 """
 import gradio as gr
@@ -44,8 +50,8 @@ mlm_iface = gr.Interface(
         show_label=False,
     ),
     examples=mlm_examples,
-    title="文本填词",
-    description='电商领域文本摘要， 基于KPLUG预训练语言模型，'
                 '<a href=""> K-PLUG: Knowledge-injected Pre-trained Language Model for Natural Language Understanding'
                 ' and Generation in E-Commerce (Findings of EMNLP 2021) </a>。'
 )

 """
+https://github.com/gradio-app/gradio/blob/299ba1bd1aed8040b3087c06c10fedf75901f91f/gradio/external.py#L484
 interface = gr.Interface.load(
     "models/bert-base-uncased", api_key=None, alias="fill-mask"
 )
+## TODO:
+1. json_output
+2. 百分数换成小数
+3.
 """
 import gradio as gr
         show_label=False,
     ),
     examples=mlm_examples,
+    title="文本填词（Fill Mask）",
+    description='基于KPLUG预训练语言模型，'
                 '<a href=""> K-PLUG: Knowledge-injected Pre-trained Language Model for Natural Language Understanding'
                 ' and Generation in E-Commerce (Findings of EMNLP 2021) </a>。'
 )

demo_sum.py CHANGED Viewed

@@ -20,6 +20,7 @@ model_dir = "models/ft_cepsum_jiadian/"
 model = BartForConditionalGeneration.from_pretrained(model_dir)  # cnn指的是cnn daily mail
 tokenizer = BertTokenizer.from_pretrained(model_dir)
 def summarize(text):
     inputs = tokenizer([text], max_length=512, return_tensors="pt")
     summary_ids = model.generate(inputs["input_ids"][:, 1:], num_beams=4, min_length=20, max_length=100)
@@ -27,7 +28,7 @@ def summarize(text):
     return summary[0]
-#TODO:
 #  1. 下拉框，选择类目。   gr.Radio(['服饰','箱包', '鞋靴']
 #  2. 支持NER、LM、Corrector
 # beam seach参数
@@ -57,13 +58,16 @@ sum_iface = gr.Interface(
                 "建议左右两侧、顶部和背部需要预留10C，电源线和调平脚等。冰箱放置时为保证，菜谱推荐，半开"
                 "门俯视图，全开门俯视图，预留参考图"),
     outputs=gr.Textbox(
-            label="文本摘要（Summarization）",
-        ),
     examples=sum_examples,
-    title="电商文本摘要（Abstractive Summarization）",
-    description='电商领域文本摘要， 基于KPLUG预训练语言模型，'
-                '<a href=""> K-PLUG: Knowledge-injected Pre-trained Language Model for Natural Language Understanding'
-                ' and Generation in E-Commerce (Findings of EMNLP 2021) </a>。更多样例，见 https://github.com/xu-song/k-plug/tree/master/data_sample/sum/cepsum/jiadian/raw '
 )
 if __name__ == "__main__":

 model = BartForConditionalGeneration.from_pretrained(model_dir)  # cnn指的是cnn daily mail
 tokenizer = BertTokenizer.from_pretrained(model_dir)
 def summarize(text):
     inputs = tokenizer([text], max_length=512, return_tensors="pt")
     summary_ids = model.generate(inputs["input_ids"][:, 1:], num_beams=4, min_length=20, max_length=100)
     return summary[0]
+# TODO:
 #  1. 下拉框，选择类目。   gr.Radio(['服饰','箱包', '鞋靴']
 #  2. 支持NER、LM、Corrector
 # beam seach参数
                 "建议左右两侧、顶部和背部需要预留10C，电源线和调平脚等。冰箱放置时为保证，菜谱推荐，半开"
                 "门俯视图，全开门俯视图，预留参考图"),
     outputs=gr.Textbox(
+        label="文本摘要（Summarization）",
+        lines=4,
+    ),
     examples=sum_examples,
+    title="生成式摘要（Abstractive Summarization）",
+    description='<div>这是一个生成式摘要的demo，用于电商领域的商品营销文案写作。'
+                '该demo基于KPLUG预训练语言模型，输入商品信息，输出商品的营销文案。</div>'
+                '<div> Paper: <a href="https://aclanthology.org/2021.findings-emnlp.1/"> K-PLUG: Knowledge-injected Pre-trained Language Model for Natural Language Understanding'
+                ' and Generation in E-Commerce (Findings of EMNLP 2021) </a> </div>'
+                '<div>Github: <a href="https://github.com/xu-song/k-plug">https://github.com/xu-song/k-plug </a> </div>'
 )
 if __name__ == "__main__":