shibing624 commited on
Commit
55292dd
1 Parent(s): 2f789a3
Files changed (2) hide show
  1. app.py +57 -4
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,7 +1,60 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import operator
3
+ import torch
4
+ from transformers import BertTokenizer, BertForMaskedLM
5
 
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+ tokenizer = BertTokenizer.from_pretrained("shibing624/macbert4csc-base-chinese")
8
+ model = BertForMaskedLM.from_pretrained("shibing624/macbert4csc-base-chinese")
9
+ model.to(device)
10
 
11
+
12
+ def ai_text(texts):
13
+ with torch.no_grad():
14
+ outputs = model(**tokenizer(texts, padding=True, return_tensors='pt').to(device))
15
+
16
+ def get_errors(corrected_text, origin_text):
17
+ sub_details = []
18
+ for i, ori_char in enumerate(origin_text):
19
+ if ori_char in [' ', '“', '”', '‘', '’', '琊', '\n', '…', '—', '擤']:
20
+ # add unk word
21
+ corrected_text = corrected_text[:i] + ori_char + corrected_text[i:]
22
+ continue
23
+ if i >= len(corrected_text):
24
+ continue
25
+ if ori_char != corrected_text[i]:
26
+ if ori_char.lower() == corrected_text[i]:
27
+ # pass english upper char
28
+ corrected_text = corrected_text[:i] + ori_char + corrected_text[i + 1:]
29
+ continue
30
+ sub_details.append((ori_char, corrected_text[i], i, i + 1))
31
+ sub_details = sorted(sub_details, key=operator.itemgetter(2))
32
+ return corrected_text, sub_details
33
+
34
+ result = []
35
+ for ids, text in zip(outputs.logits, texts):
36
+ _text = tokenizer.decode(torch.argmax(ids, dim=-1), skip_special_tokens=True).replace(' ', '')
37
+ corrected_text = _text[:len(text)]
38
+ corrected_text, details = get_errors(corrected_text, text)
39
+ print(text, ' => ', corrected_text, details)
40
+ result.append((corrected_text, details))
41
+ print(result)
42
+ return result
43
+
44
+
45
+ examples = [
46
+ ['真麻烦你了。希望你们好好的跳无'],
47
+ ['少先队员因该为老人让坐'],
48
+ ['机七学习是人工智能领遇最能体现智能的一个分知'],
49
+ ['今天心情很好',
50
+ '老是较书。'],
51
+ ['遇到一位很棒的奴生跟我聊天。'],
52
+ ['他的语说的很好,法语也不错'],
53
+ ['他法语说的很好,的语也不错'],
54
+ ['他们的吵翻很不错,再说他们做的咖喱鸡也好吃'],
55
+ ['不过在许多传统国家,女人向未得到平等'],
56
+ ]
57
+
58
+ output_text = gr.outputs.Textbox()
59
+ gr.Interface(ai_text, "textbox", output_text, title="Chinese Text Correction shibing624/macbert4csc-base-chinese",
60
+ description="Copy or input error Chinese text. Submit and the machine will correct text.", examples=examples).launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ transformers