momo's picture
add app
dcc129c
raw
history blame
5.7 kB
"""
python interactive.py
"""
import torch
from transformers import AutoTokenizer, BertForSequenceClassification, AutoModelForSequenceClassification, AutoConfig
from transformers import TextClassificationPipeline
import gradio as gr
tokenizer = AutoTokenizer.from_pretrained('momo/KcELECTRA-base_Hate_speech_Privacy_Detection')
model = AutoModelForSequenceClassification.from_pretrained(
'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
num_labels= 15,
problem_type="multi_label_classification"
)
pipe = TextClassificationPipeline(
model = model,
tokenizer = tokenizer,
return_all_scores=True,
function_to_apply='sigmoid'
)
def predict(text):
return pipe(text)[0]
iface = gr.Interface(
fn=predict,
inputs='text',
outputs='text',
examples=[["Hello! My name is Omar"]]
)
iface.launch()
# # global var
# MODEL_NAME = 'momo/KcBERT-base_Hate_speech_Privacy_Detection'
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# model = AutoModelForSequenceClassification.from_pretrained(
# MODEL_NAME,
# num_labels= 15,
# problem_type="multi_label_classification"
# )
# MODEL_BUF = {
# "name": MODEL_NAME,
# "tokenizer": tokenizer,
# "model": model,
# }
# def change_model_name(name):
# MODEL_BUF["name"] = name
# MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
# MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)
# def predict(model_name, text):
# if model_name != MODEL_BUF["name"]:
# change_model_name(model_name)
# tokenizer = MODEL_BUF["tokenizer"]
# model = MODEL_BUF["model"]
# unsmile_labels = ["์—ฌ์„ฑ/๊ฐ€์กฑ","๋‚จ์„ฑ","์„ฑ์†Œ์ˆ˜์ž","์ธ์ข…/๊ตญ์ ","์—ฐ๋ น","์ง€์—ญ","์ข…๊ต","๊ธฐํƒ€ ํ˜์˜ค","์•…ํ”Œ/์š•์„ค","clean", 'name', 'number', 'address', 'bank', 'person']
# num_labels = len(unsmile_labels)
# model.config.id2label = {i: label for i, label in zip(range(num_labels), unsmile_labels)}
# model.config.label2id = {label: i for i, label in zip(range(num_labels), unsmile_labels)}
# pipe = TextClassificationPipeline(
# model = model,
# tokenizer = tokenizer,
# return_all_scores=True,
# function_to_apply='sigmoid'
# )
# for result in pipe(text)[0]:
# output = result
# return output
# if __name__ == '__main__':
# text = '์ฟ๋”ด๊ฑธ ํ™๋ณฟ๊ธ€ ์ฟ๋ž‰๊ณญ ์Œ‘์ ฉ๋‚„๊ณ  ์•‰์•Ÿ์žˆ๋ƒฉ'
# model_name_list = [
# 'momo/KcELECTRA-base_Hate_speech_Privacy_Detection',
# "momo/KcBERT-base_Hate_speech_Privacy_Detection",
# ]
# #Create a gradio app with a button that calls predict()
# app = gr.Interface(
# fn=predict,
# inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs=['label'],
# examples = [[MODEL_BUF["name"], text], [MODEL_BUF["name"], "4=๐Ÿฆ€ 4โ‰ ๐Ÿฆ€"]],
# title="ํ•œ๊ตญ์–ด ํ˜์˜คํ‘œํ˜„, ๊ฐœ์ธ์ •๋ณด ํŒ๋ณ„๊ธฐ (Korean Hate Speech and Privacy Detection)",
# description="Korean Hate Speech and Privacy Detection."
# )
# app.launch()
# # global var
# MODEL_NAME = 'jason9693/SoongsilBERT-base-beep'
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
# config = AutoConfig.from_pretrained(MODEL_NAME)
# MODEL_BUF = {
# "name": MODEL_NAME,
# "tokenizer": tokenizer,
# "model": model,
# "config": config
# }
# def change_model_name(name):
# MODEL_BUF["name"] = name
# MODEL_BUF["tokenizer"] = AutoTokenizer.from_pretrained(name)
# MODEL_BUF["model"] = AutoModelForSequenceClassification.from_pretrained(name)
# MODEL_BUF["config"] = AutoConfig.from_pretrained(name)
# def predict(model_name, text):
# if model_name != MODEL_BUF["name"]:
# change_model_name(model_name)
# tokenizer = MODEL_BUF["tokenizer"]
# model = MODEL_BUF["model"]
# config = MODEL_BUF["config"]
# tokenized_text = tokenizer([text], return_tensors='pt')
# input_tokens = tokenizer.convert_ids_to_tokens(tokenized_text.input_ids[0])
# try:
# input_tokens = util.bytetokens_to_unicdode(input_tokens) if config.model_type in ['roberta', 'gpt', 'gpt2'] else input_tokens
# except KeyError:
# input_tokens = input_tokens
# model.eval()
# output, attention = model(**tokenized_text, output_attentions=True, return_dict=False)
# output = F.softmax(output, dim=-1)
# result = {}
# for idx, label in enumerate(output[0].detach().numpy()):
# result[config.id2label[idx]] = float(label)
# fig = visualize_attention(input_tokens, attention[0][0].detach().numpy())
# return result, fig#.logits.detach()#.numpy()#, output.attentions.detach().numpy()
# if __name__ == '__main__':
# text = '์ฟ๋”ด๊ฑธ ํ™๋ณฟ๊ธ€ ์ฟ๋ž‰๊ณญ ์Œ‘์ ฉ๋‚„๊ณ  ์•‰์•Ÿ์žˆ๋ƒฉ'
# model_name_list = [
# 'jason9693/SoongsilBERT-base-beep',
# "beomi/beep-klue-roberta-base-hate",
# "beomi/beep-koelectra-base-v3-discriminator-hate",
# "beomi/beep-KcELECTRA-base-hate"
# ]
# #Create a gradio app with a button that calls predict()
# app = gr.Interface(
# fn=predict,
# inputs=[gr.inputs.Dropdown(model_name_list, label="Model Name"), 'text'], outputs=['label', 'plot'],
# examples = [[MODEL_BUF["name"], text], [MODEL_BUF["name"], "4=๐Ÿฆ€ 4โ‰ ๐Ÿฆ€"]],
# title="ํ•œ๊ตญ์–ด ํ˜์˜ค์„ฑ ๋ฐœํ™” ๋ถ„๋ฅ˜๊ธฐ (Korean Hate Speech Classifier)",
# description="Korean Hate Speech Classifier with Several Pretrained LM\nCurrent Supported Model:\n1. SoongsilBERT\n2. KcBERT(+KLUE)\n3. KcELECTRA\n4.KoELECTRA."
# )
# app.launch(inline=False)