Spaces:

xu-song
/

kplug

Runtime error

App Files Files Community

xusong28 commited on Sep 19, 2022

Commit

19fb2f0

•

1 Parent(s): c10350f

update

Browse files

Files changed (3) hide show

app.py +15 -66
app2.py +0 -22
demo_corrector.py +8 -7

app.py CHANGED Viewed

@@ -1,74 +1,23 @@
 # coding=utf-8
 # author: xusong <xusong28@jd.com>
-# time: 2022/8/26 13:14
-import gradio as gr
-import operator
-import torch
-from transformers import BertTokenizer, BertForMaskedLM
-tokenizer = BertTokenizer.from_pretrained("shibing624/macbert4csc-base-chinese")
-model = BertForMaskedLM.from_pretrained("shibing624/macbert4csc-base-chinese")
-def ai_text(text):
-    with torch.no_grad():
-        outputs = model(**tokenizer([text], padding=True, return_tensors='pt'))
-    def to_ner(corrected_sent, errs):
-        output = [{"entity": "纠错", "word": err[1], "start": err[2], "end": err[3]} for i, err in
-                  enumerate(errs)]
-        return {"text": corrected_sent, "entities": output}
-    def get_errors(corrected_text, origin_text):
-        sub_details = []
-        for i, ori_char in enumerate(origin_text):
-            if ori_char in [' ', '“', '”', '‘', '’', '琊', '\n', '…', '—', '擤']:
-                # add unk word
-                corrected_text = corrected_text[:i] + ori_char + corrected_text[i:]
-                continue
-            if i >= len(corrected_text):
-                continue
-            if ori_char != corrected_text[i]:
-                if ori_char.lower() == corrected_text[i]:
-                    # pass english upper char
-                    corrected_text = corrected_text[:i] + ori_char + corrected_text[i + 1:]
-                    continue
-                sub_details.append((ori_char, corrected_text[i], i, i + 1))
-        sub_details = sorted(sub_details, key=operator.itemgetter(2))
-        return corrected_text, sub_details
-    _text = tokenizer.decode(torch.argmax(outputs.logits[0], dim=-1), skip_special_tokens=True).replace(' ', '')
-    corrected_text = _text[:len(text)]
-    corrected_text, details = get_errors(corrected_text, text)
-    print(text, ' => ', corrected_text, details)
-    return to_ner(corrected_text, details), details
-if __name__ == '__main__':
-    print(ai_text('少先队员因该为老人让坐'))
-    examples = [
-        ['真麻烦你了。希望你们好好的跳无'],
-        ['少先队员因该为老人让坐'],
-        ['机七学习是人工智能领遇最能体现智能的一个分知'],
-        ['今天心情很好'],
-        ['他法语说的很好，的语也不错'],
-        ['他们的吵翻很不错，再说他们做的咖喱鸡也好吃'],
-    ]
-    gr.Interface(
-        ai_text,
-        inputs="textbox",
-        outputs=[
-            gr.outputs.HighlightedText(
-                label="Output",
-                show_legend=True,
-            ),
-            gr.outputs.JSON()
-        ],
-        title="Chinese Spelling Correction Model shibing624/macbert4csc-base-chinese",
-        description="Copy or input error Chinese text. Submit and the machine will correct text.",
-        article="Link to <a href='https://github.com/shibing624/pycorrector' style='color:blue;' target='_blank\'>Github REPO</a>",
-        examples=examples).launch()

 # coding=utf-8
 # author: xusong <xusong28@jd.com>
+# time: 2022/8/23 16:06
+"""
+https://gradio.app/docs/#tabbedinterface-header
+## 更多任务
+- 抽取式摘要
+- 检索式对话 、 抽取式问答
+-
+"""
+import gradio as gr
+from demo_sum import sum_iface
+from demo_mlm import mlm_iface
+from demo_corrector import corr_iface
+demo = gr.TabbedInterface([sum_iface, mlm_iface, corr_iface], ["生成式摘要", "文本填词", "句子纠错"])
+if __name__ == "__main__":
+    demo.launch()

app2.py DELETED Viewed

@@ -1,22 +0,0 @@
-# coding=utf-8
-# author: xusong <xusong28@jd.com>
-# time: 2022/8/23 16:06
-"""
-https://gradio.app/docs/#tabbedinterface-header
-## 更多任务
-- 抽取式摘要
-- 检索式对话 、 抽取式问答
--
-"""
-import gradio as gr
-from demo_sum import sum_iface
-from demo_mlm import mlm_iface
-demo = gr.TabbedInterface([sum_iface, mlm_iface], ["生成式摘要", "文本填词", "句子纠错"])
-if __name__ == "__main__":
-    demo.launch()

demo_corrector.py CHANGED Viewed

@@ -32,7 +32,7 @@ class KplugCorrector(BertCorrector):
             logger.debug('Loaded bert model: %s, spend: %.3f s.' % (bert_model_dir, time.time() - t1))
-# corrector = KplugCorrector()
 error_sentences = [
     '少先队员因该为老人让坐',
@@ -49,8 +49,8 @@ def mock_data():
 def correct(sent):
-    # corrected_sent, errs = corrector.bert_correct(sent)
-    corrected_sent, errs = mock_data()
     print("original sentence:{} => {}, err:{}".format(sent, corrected_sent, errs))
     output = [{"entity": "纠错", "score": 0.5, "word": err[1], "start": err[2], "end": err[3]} for i, err in
               enumerate(errs)]
@@ -69,12 +69,13 @@ corr_iface = gr.Interface(
         label="输入文本",
         default="少先队员因该为老人让坐"),
     outputs=[
-        gr.HighlightedText(
-            label="纠错",
             show_legend=True,
-            # visible=False
         ),
-        gr.JSON()
     ],
     examples=error_sentences,
     title="文本纠错（Corrector）",

             logger.debug('Loaded bert model: %s, spend: %.3f s.' % (bert_model_dir, time.time() - t1))
+corrector = KplugCorrector()
 error_sentences = [
     '少先队员因该为老人让坐',
 def correct(sent):
+    corrected_sent, errs = corrector.bert_correct(sent)
+    # corrected_sent, errs = mock_data()
     print("original sentence:{} => {}, err:{}".format(sent, corrected_sent, errs))
     output = [{"entity": "纠错", "score": 0.5, "word": err[1], "start": err[2], "end": err[3]} for i, err in
               enumerate(errs)]
         label="输入文本",
         default="少先队员因该为老人让坐"),
     outputs=[
+        gr.outputs.HighlightedText(
+            label="Output",
             show_legend=True,
         ),
+        gr.outputs.JSON(
+            label="JSON Output"
+        )
     ],
     examples=error_sentences,
     title="文本纠错（Corrector）",