young476's picture
Update app.py
bd7d325 verified
import torch
import re
import json
import gradio as gr
from konlpy.tag import Okt
from transformers import AutoTokenizer, BertForSequenceClassification
# --- 1. ์„ค์ • ๋ฐ ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜ ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BASE_TOKENIZER_DIR = 'base'
EMOTION_MODEL_DIR = 'kobert_emotion_classifier'
GENRE_MODEL_DIR = 'kobert_genre_classifier_archive'
okt = Okt()
def remove_english(text):
return re.sub(r'[A-Za-z]+', '', text)
def extract_pos(text):
allowed_pos = ['Noun', 'Verb', 'Adjective']
text = remove_english(text)
return ' '.join([word for word, pos in okt.pos(text) if pos in allowed_pos])
# --- 2. ๋‘ ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ---
try:
tokenizer = AutoTokenizer.from_pretrained(BASE_TOKENIZER_DIR, trust_remote_code=True)
print("โœ… ๊ณต์šฉ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์„ฑ๊ณต")
# ๊ฐ์ • ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ (ํŒŒ์ผ์—์„œ ๋ ˆ์ด๋ธ” ์ฝ๊ธฐ)
emotion_model = BertForSequenceClassification.from_pretrained(EMOTION_MODEL_DIR)
emotion_model.to(device)
emotion_model.eval()
with open(f"{EMOTION_MODEL_DIR}/labels_ids.json", "r", encoding="utf-8") as f:
emotion_labels_ids = json.load(f)
id_to_emotion_label = {v: k for k, v in emotion_labels_ids.items()}
print("โœ… ๊ฐ์ • ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต")
# ์žฅ๋ฅด ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ
genre_model = BertForSequenceClassification.from_pretrained(GENRE_MODEL_DIR)
genre_model.to(device)
genre_model.eval()
id_to_genre_label = {
0: '๋ก/๋ฉ”ํƒˆ',
1: '๋Œ„์Šค',
2: 'R&B/Soul',
3: '๋ฐœ๋ผ๋“œ',
4: '๋žฉ/ํž™ํ•ฉ',
5: 'ํŠธ๋กœํŠธ'
}
print("โœ… ์žฅ๋ฅด ๋ถ„๋ฅ˜ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต (๋ ˆ์ด๋ธ” ์ง์ ‘ ์ •์˜)")
except Exception as e:
print(f"๋ชจ๋ธ ๋˜๋Š” ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
emotion_model, genre_model = None, None
# --- 3. ํ†ตํ•ฉ ์˜ˆ์ธก ํ•จ์ˆ˜ (์ดํ•˜ ๋™์ผ) ---
def predict_emotion_and_genre(text):
if not emotion_model or not genre_model:
raise gr.Error("๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. Space์˜ ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
preprocessed_text = extract_pos(text)
# ๊ฐ์ • ์˜ˆ์ธก
emotion_inputs = tokenizer(preprocessed_text, return_tensors='pt', truncation=True, padding=True, max_length=384).to(device)
with torch.no_grad():
emotion_logits = emotion_model(**emotion_inputs).logits
emotion_probs = torch.softmax(emotion_logits, dim=1).squeeze().cpu().numpy()
emotion_confidences = {id_to_emotion_label[i]: float(prob) for i, prob in enumerate(emotion_probs)}
# ์žฅ๋ฅด ์˜ˆ์ธก
genre_inputs = tokenizer(preprocessed_text, return_tensors='pt', truncation=True, padding=True, max_length=512).to(device)
with torch.no_grad():
genre_logits = genre_model(**genre_inputs).logits
genre_probs = torch.softmax(genre_logits, dim=1).squeeze().cpu().numpy()
genre_confidences = {id_to_genre_label[i]: float(prob) for i, prob in enumerate(genre_probs)}
return emotion_confidences, genre_confidences
# --- 4. Gradio ์ธํ„ฐํŽ˜์ด์Šค (์ดํ•˜ ๋™์ผ) ---
title = "๐ŸŽค ํ•œ๊ตญ์–ด ๊ฐ€์‚ฌ ๊ฐ์ • ๋ฐ ์žฅ๋ฅด ๋™์‹œ ๋ถ„์„๊ธฐ ๐ŸŽถ"
description = "KoBERT๋ฅผ ํŒŒ์ธํŠœ๋‹ํ•˜์—ฌ ๋งŒ๋“  ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ๊ฐ์ •๊ณผ ์žฅ๋ฅด๋ฅผ ๋™์‹œ์— ์˜ˆ์ธกํ•ฉ๋‹ˆ๋‹ค."
examples = [
["์Šฌํ””์˜ ๋ฐ‘๋ฐ”๋‹ฅ์—์„œ ๋‚œ ๋„ˆ๋ฅผ ๋งŒ๋‚˜"],
["๊ฐ€์Šด์ด ์›…์žฅํ•ด์ง„๋‹ค ์ด๊ฑด ๋ชป ์ฐธ์ง€"],
["๋„ˆ์™€ ํ•จ๊ป˜๋ผ๋ฉด ์–ด๋””๋“  ๊ฐˆ ์ˆ˜ ์žˆ์–ด"],
["์˜ค๋Š˜ ๋ฐค ์ฃผ์ธ๊ณต์€ ๋‚˜์•ผ ๋‚˜"]
]
iface = gr.Interface(
fn=predict_emotion_and_genre,
inputs=gr.Textbox(lines=10, placeholder="์—ฌ๊ธฐ์— ๋…ธ๋ž˜ ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...", label="๋…ธ๋ž˜ ๊ฐ€์‚ฌ"),
outputs=[
gr.Label(num_top_classes=3, label="๊ฐ์ • ์˜ˆ์ธก ๊ฒฐ๊ณผ"),
gr.Label(num_top_classes=3, label="์žฅ๋ฅด ์˜ˆ์ธก ๊ฒฐ๊ณผ")
],
title=title,
description=description,
examples=examples
)
iface.launch()