momo commited on
Commit
aea35b3
โ€ข
1 Parent(s): 71f8857
Files changed (1) hide show
  1. app.py +82 -58
app.py CHANGED
@@ -6,73 +6,97 @@ from transformers import AutoTokenizer, BertForSequenceClassification, AutoModel
6
  from transformers import TextClassificationPipeline
7
  import gradio as gr
8
 
9
- # global var
10
- MODEL_NAME = 'momo/KcBERT-base_Hate_speech_Privacy_Detection'
11
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
  model = AutoModelForSequenceClassification.from_pretrained(
13
- MODEL_NAME,
14
  num_labels= 15,
15
  problem_type="multi_label_classification"
16
  )
17
- # config = AutoConfig.from_pretrained(MODEL_NAME)
18
 
19
- MODEL_BUF = {
20
- "name": MODEL_NAME,
21
- "tokenizer": tokenizer,
22
- "model": model,
23
- # "config": config
24
- }
25
 
26
- def change_model_name(name):
27
- MODEL_BUF["name"] = name
28
- MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
29
- MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)
30
- # MODEL_BUF["config"] = AutoConfig.from_pretrained(name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
 
 
 
 
32
 
33
- def predict(model_name, text):
34
- if model_name != MODEL_BUF["name"]:
35
- change_model_name(model_name)
36
 
37
- tokenizer = MODEL_BUF["tokenizer"]
38
- model = MODEL_BUF["model"]
39
- # config = MODEL_BUF["config"]
40
-
41
- unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค","clean", 'name', 'number', 'address', 'bank', 'person']
42
- num_labels = len(unsmile_labels)
43
-
44
- model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
45
- model.config.label2id = {label: i for i, label in zip(range(num_labels), unsmile_labels)}
46
-
47
- pipe = TextClassificationPipeline(
48
- model = model,
49
- tokenizer = tokenizer,
50
- return_all_scores=True,
51
- function_to_apply='sigmoid'
52
- )
53
-
54
- for result in pipe(text)[0]:
55
- output = result
56
-
57
- return output
58
-
59
- if __name__ == '__main__':
60
- text = '์ฟ๋”ด๊ฑธ ํ™๋ณฟ๊ธ€ ์ฟ๋ž‰๊ณญ ์Œ‘์ ฉ๋‚„๊ณ  ์•‰์•Ÿ์žˆ๋ƒฉ'
61
-
62
- model_name_list = [
63
- 'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
64
- "momo/KcBERT-base_Hate_speech_Privacy_Detection",
65
- ]
66
-
67
- #Create a gradio app with a button that calls predict()
68
- app = gr.Interface(
69
- fn=predict,
70
- inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs=['label'],
71
- examples = [[MODEL_BUF["name"], text], [MODEL_BUF["name"], "4=๐Ÿฆ€ 4โ‰ ๐Ÿฆ€"]],
72
- title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
73
- description="Korean Hate Speech and Privacy Detection."
74
- )
75
- app.launch()
76
 
77
 
78
  # # global var
 
6
  from transformers import TextClassificationPipeline
7
  import gradio as gr
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained('momo/KcELECTRA-base_Hate_speech_Privacy_Detection')
 
 
10
  model = AutoModelForSequenceClassification.from_pretrained(
11
+ 'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
12
  num_labels= 15,
13
  problem_type="multi_label_classification"
14
  )
 
15
 
 
 
 
 
 
 
16
 
17
+ pipe = TextClassificationPipeline(
18
+ model = model,
19
+ tokenizer = tokenizer,
20
+ return_all_scores=True,
21
+ function_to_apply='sigmoid'
22
+ )
23
+
24
+ def predict(text):
25
+ return pipe(text)[0]["translation_text"]
26
+
27
+ iface = gr.Interface(
28
+ fn=predict,
29
+ inputs='text',
30
+ outputs='text',
31
+ examples=[["Hello! My name is Omar"]]
32
+ )
33
+
34
+ iface.launch()
35
+
36
+
37
+
38
+ # # global var
39
+ # MODEL_NAME = 'momo/KcBERT-base_Hate_speech_Privacy_Detection'
40
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
41
+ # model = AutoModelForSequenceClassification.from_pretrained(
42
+ # MODEL_NAME,
43
+ # num_labels= 15,
44
+ # problem_type="multi_label_classification"
45
+ # )
46
+
47
+ # MODEL_BUF = {
48
+ # "name": MODEL_NAME,
49
+ # "tokenizer": tokenizer,
50
+ # "model": model,
51
+ # }
52
 
53
+ # def change_model_name(name):
54
+ # MODEL_BUF["name"] = name
55
+ # MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
56
+ # MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)
57
 
58
+ # def predict(model_name, text):
59
+ # if model_name != MODEL_BUF["name"]:
60
+ # change_model_name(model_name)
61
 
62
+ # tokenizer = MODEL_BUF["tokenizer"]
63
+ # model = MODEL_BUF["model"]
64
+
65
+ # unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค","clean", 'name', 'number', 'address', 'bank', 'person']
66
+ # num_labels = len(unsmile_labels)
67
+
68
+ # model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
69
+ # model.config.label2id = {label: i for i, label in zip(range(num_labels), unsmile_labels)}
70
+
71
+ # pipe = TextClassificationPipeline(
72
+ # model = model,
73
+ # tokenizer = tokenizer,
74
+ # return_all_scores=True,
75
+ # function_to_apply='sigmoid'
76
+ # )
77
+
78
+ # for result in pipe(text)[0]:
79
+ # output = result
80
+
81
+ # return output
82
+
83
+ # if __name__ == '__main__':
84
+ # text = '์ฟ๋”ด๊ฑธ ํ™๋ณฟ๊ธ€ ์ฟ๋ž‰๊ณญ ์Œ‘์ ฉ๋‚„๊ณ  ์•‰์•Ÿ์žˆ๋ƒฉ'
85
+
86
+ # model_name_list = [
87
+ # 'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
88
+ # "momo/KcBERT-base_Hate_speech_Privacy_Detection",
89
+ # ]
90
+
91
+ # #Create a gradio app with a button that calls predict()
92
+ # app = gr.Interface(
93
+ # fn=predict,
94
+ # inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs=['label'],
95
+ # examples = [[MODEL_BUF["name"], text], [MODEL_BUF["name"], "4=๏ฟฝ๏ฟฝ๏ฟฝ๏ฟฝ 4โ‰ ๐Ÿฆ€"]],
96
+ # title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
97
+ # description="Korean Hate Speech and Privacy Detection."
98
+ # )
99
+ # app.launch()
 
100
 
101
 
102
  # # global var