momo commited on
Commit
d3830cc
โ€ข
1 Parent(s): 7438a13
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ python interactive.py
3
+ """
4
+ import torch
5
+ from transformers import AutoTokenizer, BertForSequenceClassification
6
+ from transformers import TextClassificationPipeline
7
+ import gradio as gr
8
+
9
+ model_name = 'momo/KcELECTRA-base_Hate_speech_Privacy_Detection'
10
+
11
+ model_name_list = [
12
+ 'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
13
+ "momo/KcBERT-base_Hate_speech_Privacy_Detection",
14
+ ]
15
+
16
+ model = BertForSequenceClassification.from_pretrained(
17
+ model_name,
18
+ num_labels=15,
19
+ problem_type="multi_label_classification"
20
+ ).cuda()
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
23
+ model.cuda()
24
+
25
+ unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค","clean", 'name', 'number', 'address', 'bank', 'person']
26
+ num_labels = len(unsmile_labels)
27
+
28
+ model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
29
+ model.config.label2id = {label: i for i, label in zip(range(num_labels), unsmile_labels)}
30
+
31
+ pipe = TextClassificationPipeline(
32
+ model = model,
33
+ tokenizer = tokenizer,
34
+ device=0,
35
+ return_all_scores=True,
36
+ function_to_apply='sigmoid'
37
+ )
38
+
39
+ def dectection(input):
40
+ for result in pipe(input)[0]:
41
+ return result
42
+
43
+ #Create a gradio app with a button that calls predict()
44
+ app = gr.Interface(
45
+ fn=dectection,
46
+ inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs=['label'],
47
+ title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
48
+ description="Korean Hate Speech and Privacy Detection."
49
+ )
50
+ app.launch(inline=False)
51
+
52
+
53
+
54
+
55
+
56
+ # # global var
57
+ # MODEL_NAME = 'jason9693/SoongsilBERT-base-beep'
58
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
59
+ # model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
60
+ # config = AutoConfig.from_pretrained(MODEL_NAME)
61
+
62
+ # MODEL_BUF = {
63
+ # "name": MODEL_NAME,
64
+ # "tokenizer": tokenizer,
65
+ # "model": model,
66
+ # "config": config
67
+ # }
68
+
69
+ # def change_model_name(name):
70
+ # MODEL_BUF["name"] = name
71
+ # MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
72
+ # MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)
73
+ # MODEL_BUF["config"] = AutoConfig.from_pretrained(name)
74
+
75
+
76
+ # def predict(model_name, text):
77
+ # if model_name != MODEL_BUF["name"]:
78
+ # change_model_name(model_name)
79
+
80
+ # tokenizer = MODEL_BUF["tokenizer"]
81
+ # model = MODEL_BUF["model"]
82
+ # config = MODEL_BUF["config"]
83
+
84
+ # tokenized_text = tokenizer([text], return_tensors='pt')
85
+
86
+ # input_tokens = tokenizer.convert_ids_to_tokens(tokenized_text.input_ids[0])
87
+ # try:
88
+ # input_tokens = util.bytetokens_to_unicdode(input_tokens) if config.model_type in ['roberta', 'gpt', 'gpt2'] else input_tokens
89
+ # except KeyError:
90
+ # input_tokens = input_tokens
91
+
92
+ # model.eval()
93
+ # output, attention = model(**tokenized_text, output_attentions=True, return_dict=False)
94
+ # output = F.softmax(output, dim=-1)
95
+ # result = {}
96
+
97
+ # for idx, label in enumerate(output[0].detach().numpy()):
98
+ # result[config.id2label[idx]] = float(label)
99
+
100
+ # fig = visualize_attention(input_tokens, attention[0][0].detach().numpy())
101
+ # return result, fig#.logits.detach()#.numpy()#, output.attentions.detach().numpy()
102
+
103
+
104
+ # if __name__ == '__main__':
105
+ # text = '์ฟ๋”ด๊ฑธ ํ™๋ณฟ๊ธ€ ์ฟ๋ž‰๊ณญ ์Œ‘์ ฉ๋‚„๊ณ  ์•‰์•Ÿ์žˆ๋ƒฉ'
106
+
107
+ # model_name_list = [
108
+ # 'jason9693/SoongsilBERT-base-beep',
109
+ # "beomi/beep-klue-roberta-base-hate",
110
+ # "beomi/beep-koelectra-base-v3-discriminator-hate",
111
+ # "beomi/beep-KcELECTRA-base-hate"
112
+ # ]
113
+
114
+ # #Create a gradio app with a button that calls predict()
115
+ # app = gr.Interface(
116
+ # fn=predict,
117
+ # inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs=['label', 'plot'],
118
+ # examples = [[MODEL_BUF["name"], text], [MODEL_BUF["name"], "4=๐Ÿฆ€ 4โ‰ ๐Ÿฆ€"]],
119
+ # title="ํ•œ๊ตญ์–ด ํ˜์˜ค์„ฑ ๋ฐœํ™” ๋ถ„๋ฅ˜๊ธฐ (Korean Hate Speech Classifier)",
120
+ # description="Korean Hate Speech Classifier with Several Pretrained LM\nCurrent Supported Model:\n1. SoongsilBERT\n2. KcBERT(+KLUE)\n3. KcELECTRA\n4.KoELECTRA."
121
+ # )
122
+ # app.launch(inline=False)