File size: 2,696 Bytes
d3830cc
 
 
056dc2d
d3830cc
 
 
e542c59
 
 
 
 
 
 
 
98186a7
e542c59
 
 
 
 
aea35b3
e542c59
 
 
 
faf7c96
e542c59
 
 
98186a7
e542c59
 
 
19a55cd
e542c59
 
 
 
 
 
 
 
c21f82a
e542c59
 
e91e4da
5332efa
e91e4da
bb7d89b
e91e4da
e542c59
 
19a55cd
 
 
e542c59
 
 
 
 
 
 
 
 
813b46e
 
19a55cd
 
 
 
 
e542c59
19a55cd
e542c59
19a55cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
python interactive.py
"""
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline
import gradio as gr

# global var
MODEL_NAME = 'momo/KcBERT-base_Hate_speech_Privacy_Detection'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels= 15,
    problem_type="multi_label_classification"
)

MODEL_BUF = {
    "name": MODEL_NAME,
    "tokenizer": tokenizer,
    "model": model,
}

def change_model_name(name):
    MODEL_BUF["name"] = name
    MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
    MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)

def predict(model_name, text):
    if model_name != MODEL_BUF["name"]:
        change_model_name(model_name)
    
    tokenizer = MODEL_BUF["tokenizer"]
    model = MODEL_BUF["model"]

    unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค", "clean", '์ด๋ฆ„', '์ „ํ™”๋ฒˆํ˜ธ', '์ฃผ์†Œ', '๊ณ„์ขŒ๋ฒˆํ˜ธ', '์ฃผ๋ฏผ๋ฒˆํ˜ธ']
    num_labels = len(unsmile_labels)

    model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
    model.config.label2id = {label: i for i, label in zip(range(num_labels), unsmile_labels)}

    pipe = TextClassificationPipeline(
    model = model,
    tokenizer = tokenizer,
    return_all_scores=True,
    function_to_apply='sigmoid'
    )

    for result in pipe(text):
        print(result)

    return result

if __name__ == '__main__':
    exam1 = '๊ฒฝ๊ธฐ๋„ ์„ฑ๋‚จ์‹œ ์ˆ˜์ •๊ตฌ ํƒœํ‰3๋™์€ ์šฐ๋ฆฌ ๋™๋„ค์•ผ!'
    exam2 = '๋‚ด ํ•ธ๋“œํฐ ๋ฒˆํ˜ธ๋Š” 010-3930-8237 ์ด์•ผ!'
    exam3 = '์•„ ์  ์žฅ ๋„ˆ๋ฌด ์งœ์ฆ๋‚œ๋‹ค'

    model_name_list = [
        'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
        "momo/KcBERT-base_Hate_speech_Privacy_Detection",
    ]

    #Create a gradio app with a button that calls predict()
    app = gr.Interface(
        fn=predict,
        inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'],
        outputs='text', 
        examples = [
            [MODEL_BUF["name"], exam1], 
            [MODEL_BUF["name"], exam2],
            [MODEL_BUF["name"], exam3]
            ],
        title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
        description="Korean Hate Speech and Privacy Detection. \t 15๊ฐœ label Detection: ์—ฌ์„ฑ/๊ฐ€์กฑ, ๋‚จ์„ฑ, ์„ฑ์†Œ์ˆ˜์ž, ์ธ์ข…/๊ตญ์ , ์—ฐ๋ น, ์ง€์—ญ, ์ข…๊ต, ๊ธฐํƒ€ ํ˜์˜ค, ์•…ํ”Œ/์š•์„ค, clean, name, number, address, bank, person"
        )
    app.launch()