|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
import numpy as np |
|
import gradio as gr |
|
import os |
|
SECRET_TOKEN = os.getenv("TOKEN_OFFENSIVENESS_ESTIMATION") |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("studio-ousia/luke-japanese-base-lite") |
|
model = AutoModelForSequenceClassification.from_pretrained("TomokiFujihara/luke-japanese-base-lite-offensiveness-estimation", trust_remote_code=True, use_auth_token=SECRET_TOKEN) |
|
|
|
|
|
def generate(text): |
|
|
|
inputs = tokenizer.encode_plus(text, return_tensors='pt') |
|
|
|
outputs = model(inputs['input_ids'], inputs['attention_mask']).detach().numpy()[0][:3] |
|
|
|
minimum = np.min(outputs) |
|
if minimum < 0: |
|
outputs = outputs - minimum |
|
score = outputs / np.sum(outputs) |
|
prediction = f'攻撃的でない発言: {score[0]:.1%},\nグレーゾーンの発言: {score[1]:.1%},\n攻撃的な発言: {score[2]:.1%}' |
|
|
|
return prediction |
|
|
|
|
|
iface = gr.Interface( |
|
|
|
generate, |
|
|
|
inputs = gr.Textbox(label = "Input a text", value = "攻撃性を評価したいコメントを入力してください."), |
|
|
|
outputs="text", |
|
|
|
title = "日本語のSNSコメントの攻撃性推定") |
|
|
|
|
|
iface.launch(share=True) |