momo commited on
Commit
19a55cd
โ€ข
1 Parent(s): a590e48
Files changed (1) hide show
  1. app.py +11 -74
app.py CHANGED
@@ -33,7 +33,7 @@ def predict(model_name, text):
33
  tokenizer = MODEL_BUF["tokenizer"]
34
  model = MODEL_BUF["model"]
35
 
36
- unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค","clean", 'name', 'number', 'address', 'bank', 'person']
37
  num_labels = len(unsmile_labels)
38
 
39
  model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
@@ -49,7 +49,9 @@ def predict(model_name, text):
49
  return pipe(text)[0]
50
 
51
  if __name__ == '__main__':
52
- text = '์ฟ๋”ด๊ฑธ ํ™๋ณฟ๊ธ€ ์ฟ๋ž‰๊ณญ ์Œ‘์ ฉ๋‚„๊ณ  ์•‰์•Ÿ์žˆ๋ƒฉ'
 
 
53
 
54
  model_name_list = [
55
  'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
@@ -60,77 +62,12 @@ if __name__ == '__main__':
60
  app = gr.Interface(
61
  fn=predict,
62
  inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs='text',
63
- examples = [[MODEL_BUF["name"], text], [MODEL_BUF["name"], "4=๐Ÿฆ€ 4โ‰ ๐Ÿฆ€"]],
 
 
 
 
64
  title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
65
- description="Korean Hate Speech and Privacy Detection."
66
  )
67
- app.launch()
68
-
69
-
70
- # # global var
71
- # MODEL_NAME = 'jason9693/SoongsilBERT-base-beep'
72
- # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
73
- # model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
74
- # config = AutoConfig.from_pretrained(MODEL_NAME)
75
-
76
- # MODEL_BUF = {
77
- # "name": MODEL_NAME,
78
- # "tokenizer": tokenizer,
79
- # "model": model,
80
- # "config": config
81
- # }
82
-
83
- # def change_model_name(name):
84
- # MODEL_BUF["name"] = name
85
- # MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
86
- # MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)
87
- # MODEL_BUF["config"] = AutoConfig.from_pretrained(name)
88
-
89
-
90
- # def predict(model_name, text):
91
- # if model_name != MODEL_BUF["name"]:
92
- # change_model_name(model_name)
93
-
94
- # tokenizer = MODEL_BUF["tokenizer"]
95
- # model = MODEL_BUF["model"]
96
- # config = MODEL_BUF["config"]
97
-
98
- # tokenized_text = tokenizer([text], return_tensors='pt')
99
-
100
- # input_tokens = tokenizer.convert_ids_to_tokens(tokenized_text.input_ids[0])
101
- # try:
102
- # input_tokens = util.bytetokens_to_unicdode(input_tokens) if config.model_type in ['roberta', 'gpt', 'gpt2'] else input_tokens
103
- # except KeyError:
104
- # input_tokens = input_tokens
105
-
106
- # model.eval()
107
- # output, attention = model(**tokenized_text, output_attentions=True, return_dict=False)
108
- # output = F.softmax(output, dim=-1)
109
- # result = {}
110
-
111
- # for idx, label in enumerate(output[0].detach().numpy()):
112
- # result[config.id2label[idx]] = float(label)
113
-
114
- # fig = visualize_attention(input_tokens, attention[0][0].detach().numpy())
115
- # return result, fig#.logits.detach()#.numpy()#, output.attentions.detach().numpy()
116
-
117
-
118
- # if __name__ == '__main__':
119
- # text = '์ฟ๋”ด๊ฑธ ํ™๋ณฟ๊ธ€ ์ฟ๋ž‰๊ณญ ์Œ‘์ ฉ๋‚„๊ณ  ์•‰์•Ÿ์žˆ๋ƒฉ'
120
-
121
- # model_name_list = [
122
- # 'jason9693/SoongsilBERT-base-beep',
123
- # "beomi/beep-klue-roberta-base-hate",
124
- # "beomi/beep-koelectra-base-v3-discriminator-hate",
125
- # "beomi/beep-KcELECTRA-base-hate"
126
- # ]
127
-
128
- # #Create a gradio app with a button that calls predict()
129
- # app = gr.Interface(
130
- # fn=predict,
131
- # inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs=['label', 'plot'],
132
- # examples = [[MODEL_BUF["name"], text], [MODEL_BUF["name"], "4=๐Ÿฆ€ 4โ‰ ๐Ÿฆ€"]],
133
- # title="ํ•œ๊ตญ์–ด ํ˜์˜ค์„ฑ ๋ฐœํ™” ๋ถ„๋ฅ˜๊ธฐ (Korean Hate Speech Classifier)",
134
- # description="Korean Hate Speech Classifier with Several Pretrained LM\nCurrent Supported Model:\n1. SoongsilBERT\n2. KcBERT(+KLUE)\n3. KcELECTRA\n4.KoELECTRA."
135
- # )
136
- # app.launch(inline=False)
 
33
  tokenizer = MODEL_BUF["tokenizer"]
34
  model = MODEL_BUF["model"]
35
 
36
+ unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค", "clean", '์ด๋ฆ„', '์ „ํ™”๋ฒˆํ˜ธ', '์ฃผ์†Œ', '๊ณ„์ขŒ๋ฒˆํ˜ธ', '์ฃผ๋ฏผ๋ฒˆํ˜ธ']
37
  num_labels = len(unsmile_labels)
38
 
39
  model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
 
49
  return pipe(text)[0]
50
 
51
  if __name__ == '__main__':
52
+ exam1 = '๊ฒฝ๊ธฐ๋„ ์„ฑ๋‚จ์‹œ ์ˆ˜์ •๊ตฌ ํƒœํ‰3๋™์€ ์šฐ๋ฆฌ ๋™๋„ค์•ผ!'
53
+ exam2 = '๋‚ด ํ•ธ๋“œํฐ ๋ฒˆํ˜ธ๋Š” 010-3930-8237 ์ด์•ผ!'
54
+ exam3 = '์•„ ์  ์žฅ ๋„ˆ๋ฌด ์งœ์ฆ๋‚œ๋‹ค'
55
 
56
  model_name_list = [
57
  'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
 
62
  app = gr.Interface(
63
  fn=predict,
64
  inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs='text',
65
+ examples = [
66
+ [MODEL_BUF["name"], exam1],
67
+ [MODEL_BUF["name"], exam2],
68
+ [MODEL_BUF["name"], exam3]
69
+ ],
70
  title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
71
+ description="Korean Hate Speech and Privacy Detection. \t 15๊ฐœ label Detection: ์—ฌ์„ฑ/๊ฐ€์กฑ, ๋‚จ์„ฑ, ์„ฑ์†Œ์ˆ˜์ž, ์ธ์ข…/๊ตญ์ , ์—ฐ๋ น, ์ง€์—ญ, ์ข…๊ต, ๊ธฐํƒ€ ํ˜์˜ค, ์•…ํ”Œ/์š•์„ค, clean, name, number, address, bank, person"
72
  )
73
+ app.launch()