review-analyzer / app.py
YeongMin
๋ถ„์„ ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ ์ œ๊ฑฐ
e018f90
# -*- coding: utf-8 -*-
"""
๋ฆฌ๋ทฐ ์ž๋™ ๊ฒ€์ˆ˜ ์„œ๋น„์Šค
Hugging Face์˜ Zero-Shot Classification ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ฆฌ๋ทฐ๋ฅผ 3๋‹จ๊ณ„๋กœ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.
๋ถ„์„ ๋‹จ๊ณ„:
1. ๊ฐ์ • ๋ถ„์„: ๊ธ์ • / ์ค‘๋ฆฝ / ๋ถ€์ •
2. ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„: ๋ฐฐ์†ก / ํ’ˆ์งˆ / ์‚ฌ์ด์ฆˆ / ๊ตํ™˜ / ์„œ๋น„์Šค ๋“ฑ
3. ๋ฆฌ๋ทฐ ํ†ค ํƒ์ง€: ๋ถˆ๋งŒ / ์š•์„ค / ํ—ˆ์œ„ํ›„๊ธฐ / ๊ด‘๊ณ  ๋“ฑ
"""
from transformers import pipeline
import json
from typing import List, Dict, Tuple
from datetime import datetime
import gradio as gr
class ReviewAnalyzer:
"""๋ฆฌ๋ทฐ๋ฅผ 3๋‹จ๊ณ„๋กœ ๋ถ„์„ํ•˜๋Š” ํด๋ž˜์Šค
1. ๊ฐ์ • ๋ถ„์„: ๊ธ์ • / ์ค‘๋ฆฝ / ๋ถ€์ •
2. ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„: ๋ฐฐ์†ก / ํ’ˆ์งˆ / ์‚ฌ์ด์ฆˆ / ๊ตํ™˜ / ์„œ๋น„์Šค ๋“ฑ
3. ๋ฆฌ๋ทฐ ํ†ค ํƒ์ง€: ๋ถˆ๋งŒ / ์š•์„ค / ํ—ˆ์œ„ํ›„๊ธฐ / ๊ด‘๊ณ  ๋“ฑ
"""
def __init__(self):
"""Zero-Shot Classification ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”"""
print("๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
# ํ•œ๊ตญ์–ด๋ฅผ ์ž˜ ์ดํ•ดํ•˜๋Š” multilingual ๋ชจ๋ธ ์‚ฌ์šฉ
self.classifier = pipeline(
"zero-shot-classification",
model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7"
)
# 1๋‹จ๊ณ„: ๊ฐ์ • ๋ถ„์„ (๊ฐœ์„ ๋œ ํ”„๋กฌํ”„ํŠธ - ๊ตฌ์ฒด์  ์˜ˆ์‹œ ํฌํ•จ)
self.sentiment_categories = [
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์ด๋‚˜ ์„œ๋น„์Šค์— ๋งŒ์กฑํ•˜๋ฉฐ ์ข‹์•„ํ•˜๊ณ  ์ถ”์ฒœํ•˜๋Š” ๊ธ์ •์ ์ธ ๊ฐ์ •์„ ํ‘œํ˜„ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ์ข‹์•„์š”, ๋งŒ์กฑ, ์ถ”์ฒœ, ํ›Œ๋ฅญ, ์ตœ๊ณ , ๊ฐ์‚ฌ, ๋งˆ์Œ์— ๋“ค์–ด์š”",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์ด๋‚˜ ์„œ๋น„์Šค์— ๋Œ€ํ•ด ์ค‘๋ฆฝ์ ์ด๊ณ  ๊ฐ๊ด€์ ์œผ๋กœ ์‚ฌ์‹ค์ด๋‚˜ ์ƒํƒœ๋งŒ์„ ๋‚˜์—ดํ•˜๋ฉฐ ํŠน๋ณ„ํ•œ ๊ฐ์ • ํ‘œํ˜„์ด ์—†์Šต๋‹ˆ๋‹ค. ์˜ˆ: ๊ทธ๋ƒฅ ๊ทธ๋ž˜์š”, ๋ณดํ†ต, ๋ฌด๋‚œ, ํ‰๋ฒ”",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์ด๋‚˜ ์„œ๋น„์Šค์— ์‹ค๋งํ•˜๊ณ  ๋ถˆ๋งŒ์กฑ์Šค๋Ÿฌ์šด ๋ถ€์ •์ ์ธ ๊ฐ์ •์„ ํ‘œํ˜„ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๋ณ„๋กœ, ์‹ค๋ง, ๋ถˆ๋งŒ์กฑ, ์ตœ์•…, ํ™”๋‚จ, ํ›„ํšŒ, ํ™˜๋ถˆ"
]
self.sentiment_mapping = {
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์ด๋‚˜ ์„œ๋น„์Šค์— ๋งŒ์กฑํ•˜๋ฉฐ ์ข‹์•„ํ•˜๊ณ  ์ถ”์ฒœํ•˜๋Š” ๊ธ์ •์ ์ธ ๊ฐ์ •์„ ํ‘œํ˜„ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ์ข‹์•„์š”, ๋งŒ์กฑ, ์ถ”์ฒœ, ํ›Œ๋ฅญ, ์ตœ๊ณ , ๊ฐ์‚ฌ, ๋งˆ์Œ์— ๋“ค์–ด์š”": "๊ธ์ •",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์ด๋‚˜ ์„œ๋น„์Šค์— ๋Œ€ํ•ด ์ค‘๋ฆฝ์ ์ด๊ณ  ๊ฐ๊ด€์ ์œผ๋กœ ์‚ฌ์‹ค์ด๋‚˜ ์ƒํƒœ๋งŒ์„ ๋‚˜์—ดํ•˜๋ฉฐ ํŠน๋ณ„ํ•œ ๊ฐ์ • ํ‘œํ˜„์ด ์—†์Šต๋‹ˆ๋‹ค. ์˜ˆ: ๊ทธ๋ƒฅ ๊ทธ๋ž˜์š”, ๋ณดํ†ต, ๋ฌด๋‚œ, ํ‰๋ฒ”": "์ค‘๋ฆฝ",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์ด๋‚˜ ์„œ๋น„์Šค์— ์‹ค๋งํ•˜๊ณ  ๋ถˆ๋งŒ์กฑ์Šค๋Ÿฌ์šด ๋ถ€์ •์ ์ธ ๊ฐ์ •์„ ํ‘œํ˜„ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๋ณ„๋กœ, ์‹ค๋ง, ๋ถˆ๋งŒ์กฑ, ์ตœ์•…, ํ™”๋‚จ, ํ›„ํšŒ, ํ™˜๋ถˆ": "๋ถ€์ •"
}
# 2๋‹จ๊ณ„: ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„ (๊ฐœ์„ ๋œ ํ”„๋กฌํ”„ํŠธ)
self.topic_categories = [
"์ด ๋ฆฌ๋ทฐ๋Š” ๋ฐฐ์†ก๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๋ฐฐ์†ก ๋น ๋ฆ„, ๋ฐฐ์†ก ๋Šฆ์Œ, ํฌ์žฅ ์ƒํƒœ, ํƒ๋ฐฐ, ๋„์ฐฉ, ํŒŒ์†",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ ํ’ˆ์งˆ ๋˜๋Š” ๋””์ž์ธ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ์žฌ์งˆ, ๋‚ด๊ตฌ์„ฑ, ์™„์„ฑ๋„, ํ’ˆ์งˆ ์ข‹์Œ, ํ’ˆ์งˆ ๋‚˜์จ, ํŠผํŠผ, ์•ฝํ•จ, ๋””์ž์ธ, ์ƒ‰์ƒ, ์™ธ๊ด€, ์˜ˆ์จ, ์Šคํƒ€์ผ, ๋ชจ์–‘, ์ƒ‰๊น”",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ ์‚ฌ์ด์ฆˆ์™€ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ํฌ๊ธฐ, ์‚ฌ์ด์ฆˆ, ํ•, ์ž‘์Œ, ํผ, ๋”ฑ ๋งž์Œ, ์น˜์ˆ˜",
"์ด ๋ฆฌ๋ทฐ๋Š” ๊ตํ™˜/ํ™˜๋ถˆ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ตํ™˜, ํ™˜๋ถˆ, ๋ฐ˜ํ’ˆ, ํ™˜๋ถˆ ์‹ ์ฒญ, ๊ตํ™˜ ์ ˆ์ฐจ",
"์ด ๋ฆฌ๋ทฐ๋Š” ๊ณ ๊ฐ ์„œ๋น„์Šค์™€ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ณ ๊ฐ์„ผํ„ฐ, ์‘๋Œ€, ์ƒ๋‹ด, A/S, ์นœ์ ˆ, ๋ถˆ์นœ์ ˆ",
"์ด ๋ฆฌ๋ทฐ๋Š” ๊ฐ€๊ฒฉ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ฐ€๊ฒฉ, ๊ฐ€์„ฑ๋น„, ๋น„์Œˆ, ์ €๋ ด, ํ• ์ธ, ๋น„์šฉ, ๋ˆ",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ ๊ธฐ๋Šฅ/์„ฑ๋Šฅ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ธฐ๋Šฅ, ์„ฑ๋Šฅ, ์ž‘๋™, ํšจ๊ณผ, ์‚ฌ์šฉ๊ฐ, ํŽธ๋ฆฌํ•จ"
]
self.topic_mapping = {
"์ด ๋ฆฌ๋ทฐ๋Š” ๋ฐฐ์†ก๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๋ฐฐ์†ก ๋น ๋ฆ„, ๋ฐฐ์†ก ๋Šฆ์Œ, ํฌ์žฅ ์ƒํƒœ, ํƒ๋ฐฐ, ๋„์ฐฉ, ํŒŒ์†": "๋ฐฐ์†ก",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ ํ’ˆ์งˆ ๋˜๋Š” ๋””์ž์ธ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ์žฌ์งˆ, ๋‚ด๊ตฌ์„ฑ, ์™„์„ฑ๋„, ํ’ˆ์งˆ ์ข‹์Œ, ํ’ˆ์งˆ ๋‚˜์จ, ํŠผํŠผ, ์•ฝํ•จ, ๋””์ž์ธ, ์ƒ‰์ƒ, ์™ธ๊ด€, ์˜ˆ์จ, ์Šคํƒ€์ผ, ๋ชจ์–‘, ์ƒ‰๊น”": "ํ’ˆ์งˆ/๋””์ž์ธ",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ ์‚ฌ์ด์ฆˆ์™€ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ํฌ๊ธฐ, ์‚ฌ์ด์ฆˆ, ํ•, ์ž‘์Œ, ํผ, ๋”ฑ ๋งž์Œ, ์น˜์ˆ˜": "์‚ฌ์ด์ฆˆ",
"์ด ๋ฆฌ๋ทฐ๋Š” ๊ตํ™˜/ํ™˜๋ถˆ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ตํ™˜, ํ™˜๋ถˆ, ๋ฐ˜ํ’ˆ, ํ™˜๋ถˆ ์‹ ์ฒญ, ๊ตํ™˜ ์ ˆ์ฐจ": "๊ตํ™˜/ํ™˜๋ถˆ",
"์ด ๋ฆฌ๋ทฐ๋Š” ๊ณ ๊ฐ ์„œ๋น„์Šค์™€ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ณ ๊ฐ์„ผํ„ฐ, ์‘๋Œ€, ์ƒ๋‹ด, A/S, ์นœ์ ˆ, ๋ถˆ์นœ์ ˆ": "์„œ๋น„์Šค",
"์ด ๋ฆฌ๋ทฐ๋Š” ๊ฐ€๊ฒฉ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ฐ€๊ฒฉ, ๊ฐ€์„ฑ๋น„, ๋น„์Œˆ, ์ €๋ ด, ํ• ์ธ, ๋น„์šฉ, ๋ˆ": "๊ฐ€๊ฒฉ",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ ๊ธฐ๋Šฅ/์„ฑ๋Šฅ๊ณผ ๊ด€๋ จ๋œ ๋‚ด์šฉ์„ ์–ธ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ: ๊ธฐ๋Šฅ, ์„ฑ๋Šฅ, ์ž‘๋™, ํšจ๊ณผ, ์‚ฌ์šฉ๊ฐ, ํŽธ๋ฆฌํ•จ": "๊ธฐ๋Šฅ/์„ฑ๋Šฅ"
}
# 3๋‹จ๊ณ„: ๋ฆฌ๋ทฐ ํ†ค ํƒ์ง€ (๊ฐœ์„ ๋œ ํ”„๋กฌํ”„ํŠธ - ์ผ๋ฐ˜ ์šฐ์„ )
self.tone_categories = [
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์— ๋Œ€ํ•œ ์†”์งํ•œ ๊ฐ์ƒ๊ณผ ํ‰๊ฐ€๋ฅผ ๋‹ด๊ณ  ์žˆ์œผ๋ฉฐ, ๊ธ์ •์ ์ด๋“  ๋ถ€์ •์ ์ด๋“  ์ง„์‹ค๋œ ์‚ฌ์šฉ ๊ฒฝํ—˜์„ ๊ณต์œ ํ•ฉ๋‹ˆ๋‹ค",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์˜ ๊ฒฐํ•จ, ๋ฐฐ์†ก์ง€์—ฐ, ์„œ๋น„์Šค ๋ฌธ์ œ ๋“ฑ ๋ช…๋ฐฑํ•œ ๋ถˆ๋งŒ์‚ฌํ•ญ์„ ์–ธ๊ธ‰ํ•˜๋ฉฐ ๋ถ€์ •์ ์ธ ๊ฒฝํ—˜์„ ํ‘œํ˜„ํ•ฉ๋‹ˆ๋‹ค",
"์ด ๋ฆฌ๋ทฐ๋Š” ํ…”๋ ˆ๊ทธ๋žจ, ์นด์นด์˜คํ†ก ๋“ฑ ๋ฉ”์‹ ์ € ์•„์ด๋””(@๋กœ ์‹œ์ž‘), ์ „ํ™”๋ฒˆํ˜ธ, ์ด๋ฉ”์ผ ๊ฐ™์€ ์—ฐ๋ฝ์ฒ˜๋ฅผ ํฌํ•จํ•˜๊ฑฐ๋‚˜, '์—ฐ๋ฝ์ฃผ์„ธ์š”', '๋„๋งค๊ฐ€', '๋ฐ˜๊ฐ’', 'ํ• ์ธ', '์ฟ ํฐ' ๋“ฑ์œผ๋กœ ๋‹ค๋ฅธ ํŒ๋งค์ฒ˜๋‚˜ ๊ฑฐ๋ž˜๋ฅผ ์œ ๋„ํ•˜๋Š” ๋ช…๋ฐฑํ•œ ๊ด‘๊ณ /์ŠคํŒธ ๋‚ด์šฉ์ž…๋‹ˆ๋‹ค"
]
self.tone_mapping = {
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์— ๋Œ€ํ•œ ์†”์งํ•œ ๊ฐ์ƒ๊ณผ ํ‰๊ฐ€๋ฅผ ๋‹ด๊ณ  ์žˆ์œผ๋ฉฐ, ๊ธ์ •์ ์ด๋“  ๋ถ€์ •์ ์ด๋“  ์ง„์‹ค๋œ ์‚ฌ์šฉ ๊ฒฝํ—˜์„ ๊ณต์œ ํ•ฉ๋‹ˆ๋‹ค": "์ผ๋ฐ˜",
"์ด ๋ฆฌ๋ทฐ๋Š” ์ œํ’ˆ์˜ ๊ฒฐํ•จ, ๋ฐฐ์†ก์ง€์—ฐ, ์„œ๋น„์Šค ๋ฌธ์ œ ๋“ฑ ๋ช…๋ฐฑํ•œ ๋ถˆ๋งŒ์‚ฌํ•ญ์„ ์–ธ๊ธ‰ํ•˜๋ฉฐ ๋ถ€์ •์ ์ธ ๊ฒฝํ—˜์„ ํ‘œํ˜„ํ•ฉ๋‹ˆ๋‹ค": "๋ถˆ๋งŒ",
"์ด ๋ฆฌ๋ทฐ๋Š” ํ…”๋ ˆ๊ทธ๋žจ, ์นด์นด์˜คํ†ก ๋“ฑ ๋ฉ”์‹ ์ € ์•„์ด๋””(@๋กœ ์‹œ์ž‘), ์ „ํ™”๋ฒˆํ˜ธ, ์ด๋ฉ”์ผ ๊ฐ™์€ ์—ฐ๋ฝ์ฒ˜๋ฅผ ํฌํ•จํ•˜๊ฑฐ๋‚˜, '์—ฐ๋ฝ์ฃผ์„ธ์š”', '๋„๋งค๊ฐ€', '๋ฐ˜๊ฐ’', 'ํ• ์ธ', '์ฟ ํฐ' ๋“ฑ์œผ๋กœ ๋‹ค๋ฅธ ํŒ๋งค์ฒ˜๋‚˜ ๊ฑฐ๋ž˜๋ฅผ ์œ ๋„ํ•˜๋Š” ๋ช…๋ฐฑํ•œ ๊ด‘๊ณ /์ŠคํŒธ ๋‚ด์šฉ์ž…๋‹ˆ๋‹ค": "๊ด‘๊ณ "
}
print("๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
print("โœ“ 3๋‹จ๊ณ„ ๋ถ„์„ ๋ชจ๋“œ ํ™œ์„ฑํ™” (๊ฐ์ • โ†’ ์นดํ…Œ๊ณ ๋ฆฌ โ†’ ํ†ค)")
def preprocess_text(self, text: str) -> str:
"""
ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ (์„ฑ๋Šฅ ๊ฐœ์„ ์šฉ)
Args:
text: ์›๋ณธ ํ…์ŠคํŠธ
Returns:
์ „์ฒ˜๋ฆฌ๋œ ํ…์ŠคํŠธ
"""
# ์•ž๋’ค ๊ณต๋ฐฑ ์ œ๊ฑฐ
text = text.strip()
# ์—ฐ์†๋œ ๊ณต๋ฐฑ์„ ํ•˜๋‚˜๋กœ
import re
text = re.sub(r'\s+', ' ', text)
return text
def split_into_sentences(self, text: str) -> List[str]:
"""
ํ…์ŠคํŠธ๋ฅผ ๋ฌธ์žฅ ๋‹จ์œ„๋กœ ๋ถ„๋ฆฌ
Args:
text: ์›๋ณธ ํ…์ŠคํŠธ
Returns:
๋ฌธ์žฅ ๋ฆฌ์ŠคํŠธ
"""
import re
# ๋ฌธ์žฅ ์ข…๊ฒฐ ๊ธฐํ˜ธ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ๋ถ„๋ฆฌ (., !, ?, ~, ใ…Žใ…Ž, ใ…‹ใ…‹ ๋“ฑ ๊ณ ๋ ค)
# ์ด๋ชจํ‹ฐ์ฝ˜๊ณผ ํŠน์ˆ˜๋ฌธ์ž ํŒจํ„ด ๋ณด์กด
sentences = re.split(r'[.!?~]+\s*', text)
# ๋นˆ ๋ฌธ์žฅ ์ œ๊ฑฐ ๋ฐ ์ •๋ฆฌ
sentences = [s.strip() for s in sentences if s.strip() and len(s.strip()) > 2]
return sentences if sentences else [text]
def analyze_sentiment(self, text: str, use_sentence_split: bool = True) -> Dict:
"""
1๋‹จ๊ณ„: ๊ฐ์ • ๋ถ„์„ (๊ธ์ • / ์ค‘๋ฆฝ / ๋ถ€์ •)
Args:
text: ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
use_sentence_split: ๋ฌธ์žฅ ๋ถ„๋ฆฌ ํ›„ ๋ถ„์„ ์—ฌ๋ถ€ (๊ธด ๋ฌธ์žฅ ๊ฐœ์„ ์šฉ)
Returns:
๊ฐ์ • ๋ถ„์„ ๊ฒฐ๊ณผ
"""
# ๊ธด ๋ฌธ์žฅ(100์ž ์ด์ƒ)์ธ ๊ฒฝ์šฐ ๋ฌธ์žฅ ๋ถ„๋ฆฌ ํ›„ ๋ถ„์„
if use_sentence_split and len(text) > 100:
sentences = self.split_into_sentences(text)
if len(sentences) > 1:
# ๊ฐ ๋ฌธ์žฅ๋ณ„ ๊ฐ์ • ์ ์ˆ˜ ์ˆ˜์ง‘
all_scores = {cat: [] for cat in self.sentiment_mapping.values()}
for sentence in sentences:
result = self.classifier(
sentence,
self.sentiment_categories,
multi_label=False
)
# ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์ ์ˆ˜ ์ˆ˜์ง‘
for label, score in zip(result['labels'], result['scores']):
category = self.sentiment_mapping[label]
all_scores[category].append(score)
# ํ‰๊ท  ์ ์ˆ˜ ๊ณ„์‚ฐ
avg_scores = {
cat: sum(scores) / len(scores) if scores else 0
for cat, scores in all_scores.items()
}
# ๊ฐ€์žฅ ๋†’์€ ์ ์ˆ˜์˜ ๊ฐ์ • ์„ ํƒ
top_sentiment = max(avg_scores.items(), key=lambda x: x[1])
sentiment = top_sentiment[0]
confidence = top_sentiment[1]
scores_dict = {
cat: round(score * 100, 2)
for cat, score in avg_scores.items()
}
return {
"sentiment": sentiment,
"confidence": round(confidence * 100, 2),
"scores": scores_dict,
"method": "sentence_split"
}
# ๊ธฐ๋ณธ ๋‹จ์ผ ๋ถ„์„
result = self.classifier(
text,
self.sentiment_categories,
multi_label=False
)
top_category = result['labels'][0]
top_score = result['scores'][0]
sentiment = self.sentiment_mapping[top_category]
scores_dict = {
self.sentiment_mapping[label]: round(score * 100, 2)
for label, score in zip(result['labels'], result['scores'])
}
return {
"sentiment": sentiment,
"confidence": round(top_score * 100, 2),
"scores": scores_dict,
"method": "single"
}
def analyze_category(self, text: str, top_k: int = 3, use_sentence_split: bool = True, min_threshold: float = 0.25) -> Dict:
"""
2๋‹จ๊ณ„: ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„ (๋ฐฐ์†ก / ํ’ˆ์งˆ / ์‚ฌ์ด์ฆˆ / ๊ตํ™˜ / ์„œ๋น„์Šค ๋“ฑ)
Args:
text: ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
top_k: ์ƒ์œ„ ๋ช‡ ๊ฐœ ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ๋ฐ˜ํ™˜ํ• ์ง€ (๊ธฐ๋ณธ 3๊ฐœ)
use_sentence_split: ๋ฌธ์žฅ ๋ถ„๋ฆฌ ํ›„ ๋ถ„์„ ์—ฌ๋ถ€ (๊ธด ๋ฌธ์žฅ ๊ฐœ์„ ์šฉ)
min_threshold: ์นดํ…Œ๊ณ ๋ฆฌ ์„ ํƒ ์ตœ์†Œ ์ž„๊ณ„๊ฐ’ (๊ธฐ๋ณธ 0.25 = 25%)
Returns:
์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„ ๊ฒฐ๊ณผ
"""
# ๊ธด ๋ฌธ์žฅ์ธ ๊ฒฝ์šฐ ๋ฌธ์žฅ๋ณ„๋กœ ๋ถ„์„ ํ›„ ์ง‘๊ณ„
if use_sentence_split and len(text) > 100:
sentences = self.split_into_sentences(text)
if len(sentences) > 1:
# ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์ ์ˆ˜ ๋ˆ„์ 
accumulated_scores = {cat: [] for cat in self.topic_mapping.values()}
for sentence in sentences:
result = self.classifier(
sentence,
self.topic_categories,
multi_label=True
)
# ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์ ์ˆ˜ ์ˆ˜์ง‘
for label, score in zip(result['labels'], result['scores']):
category = self.topic_mapping[label]
accumulated_scores[category].append(score)
# ์ตœ๋Œ€ ์ ์ˆ˜๋กœ ์ง‘๊ณ„ (์–ด๋А ํ•œ ๋ฌธ์žฅ์—์„œ๋ผ๋„ ๋†’๊ฒŒ ๋‚˜์˜ค๋ฉด ํ•ด๋‹น ์นดํ…Œ๊ณ ๋ฆฌ๋กœ ์ธ์ •)
max_scores = {
cat: max(scores) if scores else 0
for cat, scores in accumulated_scores.items()
}
# ์ ์ˆ˜ ๊ธฐ์ค€์œผ๋กœ ์ •๋ ฌ
sorted_categories = sorted(max_scores.items(), key=lambda x: x[1], reverse=True)
# ์ƒ์œ„ k๊ฐœ ์„ ํƒ (์ž„๊ณ„๊ฐ’ ์ด์ƒ๋งŒ)
categories = []
for cat, score in sorted_categories[:top_k]:
if score >= min_threshold:
categories.append({
"category": cat,
"confidence": round(score * 100, 2)
})
all_scores = {
cat: round(score * 100, 2)
for cat, score in sorted_categories
}
return {
"main_categories": categories,
"all_scores": all_scores,
"method": "sentence_split"
}
# ๊ธฐ๋ณธ ๋‹จ์ผ ๋ถ„์„
result = self.classifier(
text,
self.topic_categories,
multi_label=True # ์—ฌ๋Ÿฌ ์นดํ…Œ๊ณ ๋ฆฌ๊ฐ€ ๋™์‹œ์— ํ•ด๋‹น๋  ์ˆ˜ ์žˆ์Œ
)
# ์ƒ์œ„ k๊ฐœ์˜ ์นดํ…Œ๊ณ ๋ฆฌ ์ถ”์ถœ
categories = []
for i in range(min(top_k, len(result['labels']))):
label = result['labels'][i]
score = result['scores'][i]
# ์ž„๊ณ„๊ฐ’ ์ด์ƒ์˜ ํ™•์‹ ๋„๋ฅผ ๊ฐ€์ง„ ์นดํ…Œ๊ณ ๋ฆฌ๋งŒ ํฌํ•จ
if score >= min_threshold:
categories.append({
"category": self.topic_mapping[label],
"confidence": round(score * 100, 2)
})
all_scores = {
self.topic_mapping[label]: round(score * 100, 2)
for label, score in zip(result['labels'], result['scores'])
}
return {
"main_categories": categories,
"all_scores": all_scores,
"method": "single"
}
def analyze_tone(self, text: str) -> Dict:
"""
3๋‹จ๊ณ„: ๋ฆฌ๋ทฐ ํ†ค ํƒ์ง€ (๋ถˆ๋งŒ / ์š•์„ค / ํ—ˆ์œ„ํ›„๊ธฐ / ๊ด‘๊ณ  ๋“ฑ)
Args:
text: ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
Returns:
ํ†ค ๋ถ„์„ ๊ฒฐ๊ณผ
"""
result = self.classifier(
text,
self.tone_categories,
multi_label=False
)
top_category = result['labels'][0]
top_score = result['scores'][0]
tone = self.tone_mapping[top_category]
scores_dict = {
self.tone_mapping[label]: round(score * 100, 2)
for label, score in zip(result['labels'], result['scores'])
}
return {
"tone": tone,
"confidence": round(top_score * 100, 2),
"scores": scores_dict
}
def generate_rating_from_sentiment(self, category: str, confidence: float, sentiment: str) -> int:
"""
์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ๊ฐ์ •๊ณผ ํ™•์‹ ๋„๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๋ณ„์  ์ƒ์„ฑ
Args:
category: ์นดํ…Œ๊ณ ๋ฆฌ๋ช…
confidence: ํ™•์‹ ๋„ (0-100)
sentiment: ๊ฐ์ • (๊ธ์ •/์ค‘๋ฆฝ/๋ถ€์ •)
Returns:
๋ณ„์  (1-5)
"""
# ๊ธฐ๋ณธ ์ ์ˆ˜: ๊ฐ์ •์— ๋”ฐ๋ผ
if sentiment == "๊ธ์ •":
base_score = 4.5
elif sentiment == "์ค‘๋ฆฝ":
base_score = 3.0
else: # ๋ถ€์ •
base_score = 1.5
# ํ™•์‹ ๋„์— ๋”ฐ๋ผ ์ ์ˆ˜ ์กฐ์ •
confidence_factor = confidence / 100.0
final_score = base_score * confidence_factor + 2.5 * (1 - confidence_factor)
# 1-5 ์‚ฌ์ด๋กœ ํด๋žจํ•‘
final_score = max(1, min(5, final_score))
return round(final_score)
def extract_evidence_from_text(self, text: str, category: str, sentiment: str = None) -> str:
"""
ํ…์ŠคํŠธ์—์„œ ํŠน์ • ์นดํ…Œ๊ณ ๋ฆฌ ๊ด€๋ จ ๊ทผ๊ฑฐ ๋ฌธ์žฅ ์ถ”์ถœ
์นดํ…Œ๊ณ ๋ฆฌ ํ‚ค์›Œ๋“œ๊ฐ€ ํฌํ•จ๋œ ์กฐ๊ฐ๋งŒ ์ถ”์ถœํ•˜๋ฉฐ, ๊ฐ์ •๊ณผ ์ผ์น˜ํ•˜๋Š” ๊ทผ๊ฑฐ ์šฐ์„ 
Args:
text: ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
category: ์นดํ…Œ๊ณ ๋ฆฌ๋ช…
sentiment: ํ•ด๋‹น ์นดํ…Œ๊ณ ๋ฆฌ์˜ ๊ฐ์ • ("๊ธ์ •"/"๋ถ€์ •"/"์ค‘๋ฆฝ", None์ด๋ฉด ๋ฌด์‹œ)
Returns:
๊ทผ๊ฑฐ ๋ฌธ์žฅ (๋”ฐ์˜ดํ‘œ๋กœ ๊ฐ์‹ธ์ง„ ํ˜•ํƒœ)
"""
import re
# ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ํ‚ค์›Œ๋“œ ๋งคํ•‘
keywords = {
"๋ฐฐ์†ก": ["๋ฐฐ์†ก", "ํƒ๋ฐฐ", "๋„์ฐฉ", "ํฌ์žฅ"],
"ํ’ˆ์งˆ/๋””์ž์ธ": ["ํ’ˆ์งˆ", "์žฌ์งˆ", "ํŠผํŠผ", "๋‚ด๊ตฌ", "์™„์„ฑ๋„", "ํ„ธ๋น ์ง", "๋น ์ง", "๋””์ž์ธ", "์ƒ‰์ƒ", "์Šคํƒ€์ผ", "์™ธ๊ด€"],
"์‚ฌ์ด์ฆˆ": ["์‚ฌ์ด์ฆˆ", "ํฌ๊ธฐ", "ํ•", "์น˜์ˆ˜"],
"๊ตํ™˜/ํ™˜๋ถˆ": ["๊ตํ™˜", "ํ™˜๋ถˆ", "๋ฐ˜ํ’ˆ"],
"์„œ๋น„์Šค": ["์„œ๋น„์Šค", "๊ณ ๊ฐ์„ผํ„ฐ", "์‘๋Œ€", "์นœ์ ˆ"],
"๊ฐ€๊ฒฉ": ["๊ฐ€๊ฒฉ", "๊ฐ€์„ฑ๋น„", "๋น„์‹ธ", "์ €๋ ด", "ํ• ์ธ", "๋ˆ"],
"๊ธฐ๋Šฅ/์„ฑ๋Šฅ": ["๊ธฐ๋Šฅ", "์„ฑ๋Šฅ", "์ž‘๋™", "ํšจ๊ณผ", "์‚ฌ์šฉ"]
}
if category not in keywords:
return "-"
category_keywords = keywords[category]
# ๊ฐ์ • ํ‚ค์›Œ๋“œ
positive_keywords = ["์ข‹", "ํ›Œ๋ฅญ", "๋งŒ์กฑ", "์ตœ๊ณ ", "์˜ˆ์˜", "์ด์˜", "๋”ฑ๋งž", "๋น ๋ฅด", "๊ดœ์ฐฎ", "์™„๋ฒฝ", "๋ฉ‹์ง€", "๊ฐ์‚ฌ"]
negative_keywords = ["๋ณ„๋กœ", "์•„์‰ฝ", "์‹ค๋ง", "์ตœ์•…", "์งœ์ฆ", "๋ฌธ์ œ", "๋‚˜์˜", "ํ˜•ํŽธ์—†", "์—‰๋ง", "ํ›„ํšŒ", "๋‹ค๋ฅด", "์•ˆ", "๋ชป", "๋ณต์žก"]
# ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์กฐ๊ฐ์œผ๋กœ ๋‚˜๋ˆ„๊ธฐ
chunks = re.split(r'[,]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ•˜์ง€๋งŒ\s+|\s+์ธ๋ฐ\s+', text)
matching_chunks = []
for chunk in chunks:
chunk = chunk.strip()
# ์ด ์กฐ๊ฐ์— ์นดํ…Œ๊ณ ๋ฆฌ ํ‚ค์›Œ๋“œ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
has_category = False
for keyword in category_keywords:
if keyword in chunk and len(chunk) > 5:
has_category = True
break
if not has_category:
continue
# sentiment๊ฐ€ ์ง€์ •๋œ ๊ฒฝ์šฐ, ๊ฐ์ •๊ณผ ์ผ์น˜ํ•˜๋Š” ์กฐ๊ฐ ์ฐพ๊ธฐ
if sentiment:
chunk_lower = chunk.lower()
has_positive = any(kw in chunk_lower for kw in positive_keywords)
has_negative = any(kw in chunk_lower for kw in negative_keywords)
# ๊ฐ์ •๊ณผ ์ผ์น˜ํ•˜๋Š”์ง€ ํ™•์ธ
if sentiment == "๊ธ์ •" and has_positive and not has_negative:
matching_chunks.append((chunk, True)) # ๊ฐ์ • ์ผ์น˜
elif sentiment == "๋ถ€์ •" and has_negative:
matching_chunks.append((chunk, True)) # ๊ฐ์ • ์ผ์น˜
else:
matching_chunks.append((chunk, False)) # ๊ฐ์ • ๋ถˆ์ผ์น˜
else:
matching_chunks.append((chunk, True))
# ๊ฐ์ •์ด ์ผ์น˜ํ•˜๋Š” ์กฐ๊ฐ ์šฐ์„ , ์—†์œผ๋ฉด ์ฒซ ๋ฒˆ์งธ ์กฐ๊ฐ
for chunk, is_match in matching_chunks:
if is_match:
if len(chunk) > 20:
chunk = chunk[:20]
return f'"{chunk}"'
# ๊ฐ์ • ์ผ์น˜ ์กฐ๊ฐ์ด ์—†์œผ๋ฉด ์ฒซ ๋ฒˆ์งธ ์กฐ๊ฐ ๋ฐ˜ํ™˜
if matching_chunks:
chunk = matching_chunks[0][0]
if len(chunk) > 20:
chunk = chunk[:20]
return f'"{chunk}"'
return "-"
def analyze_sentiment_for_category(self, text: str, category: str) -> str:
"""
ํŠน์ • ์นดํ…Œ๊ณ ๋ฆฌ์— ๋Œ€ํ•œ ๊ฐ์ • ๋ถ„์„
์นดํ…Œ๊ณ ๋ฆฌ ํ‚ค์›Œ๋“œ ๊ทผ์ฒ˜์˜ ๊ฐ์ • ํ‘œํ˜„๋งŒ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.
Args:
text: ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
category: ์นดํ…Œ๊ณ ๋ฆฌ๋ช…
Returns:
๊ฐ์ • (๊ธ์ •/์ค‘๋ฆฝ/๋ถ€์ •)
"""
import re
# ์นดํ…Œ๊ณ ๋ฆฌ ๊ด€๋ จ ํ‚ค์›Œ๋“œ
keywords = {
"๋ฐฐ์†ก": ["๋ฐฐ์†ก", "ํƒ๋ฐฐ", "๋„์ฐฉ", "ํฌ์žฅ"],
"ํ’ˆ์งˆ/๋””์ž์ธ": ["ํ’ˆ์งˆ", "์žฌ์งˆ", "ํŠผํŠผ", "๋‚ด๊ตฌ", "์™„์„ฑ๋„", "ํ„ธ๋น ์ง", "๋น ์ง", "๋””์ž์ธ", "์ƒ‰์ƒ", "์Šคํƒ€์ผ", "์™ธ๊ด€"],
"์‚ฌ์ด์ฆˆ": ["์‚ฌ์ด์ฆˆ", "ํฌ๊ธฐ", "ํ•", "์น˜์ˆ˜"],
"๊ตํ™˜/ํ™˜๋ถˆ": ["๊ตํ™˜", "ํ™˜๋ถˆ", "๋ฐ˜ํ’ˆ"],
"์„œ๋น„์Šค": ["์„œ๋น„์Šค", "๊ณ ๊ฐ์„ผํ„ฐ", "์‘๋Œ€", "์นœ์ ˆ"],
"๊ฐ€๊ฒฉ": ["๊ฐ€๊ฒฉ", "๊ฐ€์„ฑ๋น„", "๋น„์‹ธ", "์ €๋ ด", "ํ• ์ธ", "๋ˆ"],
"๊ธฐ๋Šฅ/์„ฑ๋Šฅ": ["๊ธฐ๋Šฅ", "์„ฑ๋Šฅ", "์ž‘๋™", "ํšจ๊ณผ", "์‚ฌ์šฉ"]
}
# ๋ถ€์ • ํ‚ค์›Œ๋“œ (๋ถ€์ • ํ‚ค์›Œ๋“œ๋ฅผ ๋จผ์ € ์ฒดํฌํ•ด์•ผ ์ •ํ™•ํ•จ)
negative_keywords = ["๋ณ„๋กœ", "์•„์‰ฝ", "์‹ค๋ง", "์ตœ์•…", "์งœ์ฆ", "๋ฌธ์ œ", "๋‚˜์˜", "ํ˜•ํŽธ์—†", "์—‰๋ง", "ํ›„ํšŒ", "๋‹ค๋ฅด", "์•ˆ", "๋ชป"]
# ๊ธ์ • ํ‚ค์›Œ๋“œ (๋ช…์‹œ์  ๊ธ์ • ํ‘œํ˜„)
positive_keywords = ["์ข‹", "ํ›Œ๋ฅญ", "๋งŒ์กฑ", "์ตœ๊ณ ", "์˜ˆ์˜", "์ด์˜", "๋”ฑ๋งž", "๋น ๋ฅด", "๊ดœ์ฐฎ", "์™„๋ฒฝ", "๋ฉ‹์ง€", "๊ฐ์‚ฌ"]
if category not in keywords:
return "์ค‘๋ฆฝ"
# ์นดํ…Œ๊ณ ๋ฆฌ ํ‚ค์›Œ๋“œ๊ฐ€ ํฌํ•จ๋œ ๊ตฌ๊ฐ„ ์ฐพ๊ธฐ
category_keywords = keywords[category]
# ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์กฐ๊ฐ์œผ๋กœ ๋‚˜๋ˆ„๊ธฐ (์‰ผํ‘œ, ๊ทธ๋ฆฌ๊ณ , ํ•˜์ง€๋งŒ ๋“ฑ์œผ๋กœ ๋ถ„๋ฆฌ)
# ์˜ˆ: "๋ฐฐ์†ก์€ ๋น ๋ฅธ๋ฐ ํ’ˆ์งˆ์ด ๋ณ„๋กœ์˜ˆ์š”" -> ["๋ฐฐ์†ก์€ ๋น ๋ฅธ๋ฐ", "ํ’ˆ์งˆ์ด ๋ณ„๋กœ์˜ˆ์š”"]
chunks = re.split(r'[,]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ•˜์ง€๋งŒ\s+|\s+์ธ๋ฐ\s+', text)
for chunk in chunks:
# ์ด ์กฐ๊ฐ์— ์นดํ…Œ๊ณ ๋ฆฌ ํ‚ค์›Œ๋“œ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
has_category = False
for keyword in category_keywords:
if keyword in chunk:
has_category = True
break
if not has_category:
continue
# ์ด ์กฐ๊ฐ ๋‚ด์—์„œ๋งŒ ๊ฐ์ • ํŒ๋‹จ
chunk_lower = chunk.lower()
# ๋ถ€์ • ํ‚ค์›Œ๋“œ๋ฅผ ๋จผ์ € ์ฒดํฌ (์šฐ์„ ์ˆœ์œ„๊ฐ€ ๋†’์Œ)
for neg_keyword in negative_keywords:
if neg_keyword in chunk_lower:
return "๋ถ€์ •"
# ๊ธ์ • ํ‚ค์›Œ๋“œ ์ฒดํฌ
for pos_keyword in positive_keywords:
if pos_keyword in chunk_lower:
return "๊ธ์ •"
# ๊ธฐ๋ณธ๊ฐ’์€ ์ค‘๋ฆฝ
return "์ค‘๋ฆฝ"
def extract_tone_evidence(self, text: str) -> Dict[str, str]:
"""
์ „์ฒด ํ†ค์˜ ๊ธ์ •/๋ถ€์ • ๊ทผ๊ฑฐ ์ถ”์ถœ
Args:
text: ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
Returns:
{"positive": "๊ธ์ • ๊ทผ๊ฑฐ", "negative": "๋ถ€์ • ๊ทผ๊ฑฐ"}
"""
import re
# ๊ธ์ • ํ‚ค์›Œ๋“œ
positive_keywords = ["์ข‹", "ํ›Œ๋ฅญ", "๋งŒ์กฑ", "์ตœ๊ณ ", "์˜ˆ์˜", "์ด์˜", "๋”ฑ๋งž", "๋น ๋ฅด", "๊ดœ์ฐฎ", "์™„๋ฒฝ", "๋ฉ‹์ง€", "๊ฐ์‚ฌ"]
# ๋ถ€์ • ํ‚ค์›Œ๋“œ
negative_keywords = ["๋ณ„๋กœ", "์•„์‰ฝ", "์‹ค๋ง", "์ตœ์•…", "์งœ์ฆ", "๋ฌธ์ œ", "๋‚˜์˜", "ํ˜•ํŽธ์—†", "์—‰๋ง", "ํ›„ํšŒ", "๋‹ค๋ฅด", "๋ณต์žก", "๋ถˆํŽธ","๋А๋ฆฌ", "๋А๋ฆผ", "๋Šฆ", "์ง€์—ฐ"]
# ํ…์ŠคํŠธ๋ฅผ ์กฐ๊ฐ์œผ๋กœ ๋‚˜๋ˆ„๊ธฐ
chunks = re.split(r'[,.]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ•˜์ง€๋งŒ\s+|\s+์ธ๋ฐ\s+', text)
positive_evidence = []
negative_evidence = []
for chunk in chunks:
chunk = chunk.strip()
if len(chunk) < 3:
continue
chunk_lower = chunk.lower()
# ๊ธ์ • ํ‚ค์›Œ๋“œ ์ฒดํฌ - chunk ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
for keyword in positive_keywords:
if keyword in chunk_lower:
# chunk๋ฅผ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ (์ด๋ฏธ ์กฐ๊ฐ์œผ๋กœ ๋ถ„๋ฆฌ๋˜์–ด ์žˆ์œผ๋ฏ€๋กœ)
evidence = chunk
if len(evidence) > 20:
evidence = evidence[:20]
positive_evidence.append(f'"{evidence}"')
break
# ๋ถ€์ • ํ‚ค์›Œ๋“œ ์ฒดํฌ - chunk ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
for keyword in negative_keywords:
if keyword in chunk_lower:
# chunk๋ฅผ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ (์ด๋ฏธ ์กฐ๊ฐ์œผ๋กœ ๋ถ„๋ฆฌ๋˜์–ด ์žˆ์œผ๋ฏ€๋กœ)
evidence = chunk
if len(evidence) > 20:
evidence = evidence[:20]
negative_evidence.append(f'"{evidence}"')
break
# ์ตœ๋Œ€ 2๊ฐœ์”ฉ๋งŒ ํ‘œ์‹œ
positive_text = ", ".join(positive_evidence[:2]) if positive_evidence else "-"
negative_text = ", ".join(negative_evidence[:2]) if negative_evidence else "-"
return {
"positive": positive_text,
"negative": negative_text
}
def generate_comprehensive_analysis(self, review_text: str, analysis_result: Dict) -> Dict:
"""
์ข…ํ•ฉ ๋ถ„์„ ์ƒ์„ฑ - ํ•ญ๋ชฉ๋ณ„ ํ‰๊ฐ€ ๋ฐ ์š”์•ฝ
Args:
review_text: ์›๋ณธ ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
analysis_result: 3๋‹จ๊ณ„ ๋ถ„์„ ๊ฒฐ๊ณผ
Returns:
์ข…ํ•ฉ ๋ถ„์„ ๊ฒฐ๊ณผ
"""
sentiment = analysis_result['sentiment']['sentiment']
sentiment_scores = analysis_result['sentiment']['scores']
tone = analysis_result['tone']['tone']
# ๋ชจ๋“  ๊ฐ€๋Šฅํ•œ ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ๊ฒ€์‚ฌ (AI ๊ฒฐ๊ณผ์™€ ๋ฌด๊ด€ํ•˜๊ฒŒ)
all_possible_categories = ["๋ฐฐ์†ก", "ํ’ˆ์งˆ/๋””์ž์ธ", "์‚ฌ์ด์ฆˆ", "๊ตํ™˜/ํ™˜๋ถˆ", "์„œ๋น„์Šค", "๊ฐ€๊ฒฉ", "๊ธฐ๋Šฅ/์„ฑ๋Šฅ"]
# ํ•ญ๋ชฉ๋ณ„ ํ‰๊ฐ€
item_ratings = []
for category in all_possible_categories:
# ํ•ด๋‹น ์นดํ…Œ๊ณ ๋ฆฌ์˜ ๊ฐ์ • ๋ถ„์„ (๋จผ์ € ๊ฐ์ •์„ ํŒŒ์•…)
category_sentiment = self.analyze_sentiment_for_category(review_text, category)
# ๊ทผ๊ฑฐ ์ถ”์ถœ (๊ฐ์ •๊ณผ ์ผ์น˜ํ•˜๋Š” ๊ทผ๊ฑฐ ์šฐ์„ )
evidence = self.extract_evidence_from_text(review_text, category, category_sentiment)
# ๊ทผ๊ฑฐ๊ฐ€ ์—†์œผ๋ฉด ํ•ด๋‹น ํ•ญ๋ชฉ ์ œ์™ธ
if evidence == "-":
continue
# ๋ณ„์  ๊ณ„์‚ฐ (์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ๊ฐ์ • ๊ธฐ๋ฐ˜)
if category_sentiment == "๋ถ€์ •":
rating = 2
elif category_sentiment == "๊ธ์ •":
rating = 5
else:
rating = 3
item_ratings.append({
"category": category,
"rating": rating,
"evidence": evidence,
"sentiment": category_sentiment
})
# ์ „์ฒด ํ†ค ๋น„์œจ
positive_ratio = sentiment_scores.get('๊ธ์ •', 0)
negative_ratio = sentiment_scores.get('๋ถ€์ •', 0)
neutral_ratio = sentiment_scores.get('์ค‘๋ฆฝ', 0)
# ์ „์ฒด ํ†ค ๊ทผ๊ฑฐ ์ถ”์ถœ
tone_evidence = self.extract_tone_evidence(review_text)
# ์š”์•ฝ ๋ฌธ์žฅ ์ƒ์„ฑ
summary = self.generate_summary_sentence(review_text, item_ratings, sentiment)
return {
"item_ratings": item_ratings,
"tone_ratio": {
"positive": round(positive_ratio),
"negative": round(negative_ratio),
"neutral": round(neutral_ratio)
},
"tone_evidence": tone_evidence,
"summary": summary,
"overall_sentiment": sentiment
}
def generate_summary_sentence(self, review_text: str, item_ratings: List[Dict], sentiment: str) -> str:
"""
์š”์•ฝ ๋ฌธ์žฅ ์ž๋™ ์ƒ์„ฑ
Args:
review_text: ์›๋ณธ ๋ฆฌ๋ทฐ
item_ratings: ํ•ญ๋ชฉ๋ณ„ ํ‰๊ฐ€
sentiment: ์ „์ฒด ๊ฐ์ •
Returns:
์š”์•ฝ ๋ฌธ์žฅ
"""
# ๋†’์€ ํ‰๊ฐ€ ํ•ญ๋ชฉ๊ณผ ๋‚ฎ์€ ํ‰๊ฐ€ ํ•ญ๋ชฉ ์ฐพ๊ธฐ
high_rated = [item for item in item_ratings if item['rating'] >= 4]
low_rated = [item for item in item_ratings if item['rating'] <= 2]
if high_rated and low_rated:
# ์žฅ๋‹จ์ ์ด ๋ชจ๋‘ ์žˆ๋Š” ๊ฒฝ์šฐ
high_cats = ", ".join([item['category'] for item in high_rated[:2]])
low_cats = ", ".join([item['category'] for item in low_rated[:2]])
return f"{high_cats}์€(๋Š”) ์ข‹์ง€๋งŒ, {low_cats} ๋ถ€๋ถ„์ด ์•„์‰ฌ์šด ์ œํ’ˆ์ด์—์š”."
elif high_rated:
# ๊ธ์ •์ ์ธ ๊ฒฝ์šฐ
high_cats = ", ".join([item['category'] for item in high_rated[:3]])
return f"{high_cats} ๋ชจ๋‘ ๋งŒ์กฑ์Šค๋Ÿฌ์šด ์ œํ’ˆ์ด์—์š”."
elif low_rated:
# ๋ถ€์ •์ ์ธ ๊ฒฝ์šฐ
low_cats = ", ".join([item['category'] for item in low_rated[:3]])
return f"{low_cats} ๋ถ€๋ถ„์ด ๊ธฐ๋Œ€์— ๋ชป ๋ฏธ์น˜๋Š” ์ œํ’ˆ์ด์—์š”."
else:
# ์ค‘๋ฆฝ์ ์ธ ๊ฒฝ์šฐ
if sentiment == "๊ธ์ •":
return "์ „๋ฐ˜์ ์œผ๋กœ ๋งŒ์กฑ์Šค๋Ÿฌ์šด ์ œํ’ˆ์ด์—์š”."
elif sentiment == "๋ถ€์ •":
return "์ „๋ฐ˜์ ์œผ๋กœ ์•„์‰ฌ์›€์ด ๋‚จ๋Š” ์ œํ’ˆ์ด์—์š”."
else:
return "๋ฌด๋‚œํ•œ ์ˆ˜์ค€์˜ ์ œํ’ˆ์ด์—์š”."
def analyze_review(self, review_text: str, include_comprehensive: bool = True) -> Dict:
"""
๋‹จ์ผ ๋ฆฌ๋ทฐ๋ฅผ 3๋‹จ๊ณ„๋กœ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.
Args:
review_text: ๋ถ„์„ํ•  ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
include_comprehensive: ์ข…ํ•ฉ ๋ถ„์„ ํฌํ•จ ์—ฌ๋ถ€
Returns:
3๋‹จ๊ณ„ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ํฌํ•จํ•œ ๋”•์…”๋„ˆ๋ฆฌ
"""
# ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ
processed_text = self.preprocess_text(review_text)
# 1๋‹จ๊ณ„: ๊ฐ์ • ๋ถ„์„
sentiment_result = self.analyze_sentiment(processed_text)
# 2๋‹จ๊ณ„: ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„
category_result = self.analyze_category(processed_text)
# 3๋‹จ๊ณ„: ํ†ค ๋ถ„์„
tone_result = self.analyze_tone(processed_text)
result = {
"review": review_text,
"sentiment": sentiment_result,
"categories": category_result,
"tone": tone_result,
"timestamp": datetime.now().isoformat()
}
# ์ข…ํ•ฉ ๋ถ„์„ ์ถ”๊ฐ€
if include_comprehensive:
result["comprehensive"] = self.generate_comprehensive_analysis(review_text, result)
return result
def analyze_reviews(self, reviews: List[str]) -> List[Dict]:
"""
์—ฌ๋Ÿฌ ๋ฆฌ๋ทฐ๋ฅผ ์ผ๊ด„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.
Args:
reviews: ๋ถ„์„ํ•  ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ ๋ฆฌ์ŠคํŠธ
Returns:
๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ ๋ฆฌ์ŠคํŠธ
"""
results = []
for idx, review in enumerate(reviews, 1):
print(f"\n[{idx}/{len(reviews)}] ๋ถ„์„ ์ค‘...")
result = self.analyze_review(review)
results.append(result)
return results
def print_results(self, results: List[Dict]):
"""๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋ณด๊ธฐ ์ข‹๊ฒŒ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."""
print("\n" + "="*80)
print("๋ฆฌ๋ทฐ 3๋‹จ๊ณ„ ๋ถ„์„ ๊ฒฐ๊ณผ")
print("="*80)
for idx, result in enumerate(results, 1):
print(f"\n[๋ฆฌ๋ทฐ #{idx}]")
print(f"๋‚ด์šฉ: {result['review']}")
print(f"\n1๏ธโƒฃ ๊ฐ์ •: {result['sentiment']['sentiment']} ({result['sentiment']['confidence']}%)")
# ์นดํ…Œ๊ณ ๋ฆฌ ์ถœ๋ ฅ
categories_str = ', '.join([f"{c['category']} ({c['confidence']}%)" for c in result['categories']['main_categories']])
print(f"2๏ธโƒฃ ์นดํ…Œ๊ณ ๋ฆฌ: {categories_str}")
print(f"3๏ธโƒฃ ํ†ค: {result['tone']['tone']} ({result['tone']['confidence']}%)")
print("\n" + "="*80)
def save_results(self, results: List[Dict], filename: str = "review_results.json"):
"""๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ JSON ํŒŒ์ผ๋กœ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค."""
with open(filename, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n๊ฒฐ๊ณผ๊ฐ€ {filename}์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
def load_reviews_from_csv(self, csv_file: str) -> List[str]:
"""
CSV ํŒŒ์ผ์—์„œ ๋ฆฌ๋ทฐ๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
Args:
csv_file: CSV ํŒŒ์ผ ๊ฒฝ๋กœ
Returns:
๋ฆฌ๋ทฐ ํ…์ŠคํŠธ ๋ฆฌ์ŠคํŠธ
"""
reviews = []
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
reviews.append(row['review_text'])
return reviews
def analyze_for_gradio(self, review_text: str):
"""
Gradio UI์šฉ ๋ฆฌ๋ทฐ ๋ถ„์„ ํ•จ์ˆ˜
Args:
review_text: ๋ถ„์„ํ•  ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
Returns:
(๊ฐ์ • ๊ฒฐ๊ณผ, ์นดํ…Œ๊ณ ๋ฆฌ ๊ฒฐ๊ณผ, ํ†ค ๊ฒฐ๊ณผ, ๊ฐ์ • ๋ถ„ํฌ, ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„ํฌ, ํ†ค ๋ถ„ํฌ) ํŠœํ”Œ
"""
if not review_text or review_text.strip() == "":
return "โš ๏ธ ๋ฆฌ๋ทฐ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”", "", "", {}, {}, {}
result = self.analyze_review(review_text, include_comprehensive=False)
# 1๋‹จ๊ณ„: ๊ฐ์ • ๋ถ„์„ ๊ฒฐ๊ณผ
sentiment = result['sentiment']['sentiment']
sentiment_conf = result['sentiment']['confidence']
sentiment_output = f"{sentiment} ({sentiment_conf}%)"
# 2๋‹จ๊ณ„: ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„ ๊ฒฐ๊ณผ
categories = result['categories']['main_categories']
if categories:
category_list = [f"โ€ข {c['category']}" for c in categories]
category_output = "\n".join(category_list)
else:
category_output = "ํ•ด๋‹น ์นดํ…Œ๊ณ ๋ฆฌ ์—†์Œ"
# 3๋‹จ๊ณ„: ํ†ค ๋ถ„์„ ๊ฒฐ๊ณผ
tone = result['tone']['tone']
tone_conf = result['tone']['confidence']
tone_output = f"{tone} ({tone_conf}%)"
# ํ™•๋ฅ  ๋ถ„ํฌ ๋”•์…”๋„ˆ๋ฆฌ๋“ค (Gradio Label ์ปดํฌ๋„ŒํŠธ์šฉ)
sentiment_probs = {
k: v / 100.0 for k, v in result['sentiment']['scores'].items()
}
category_probs = {
k: v / 100.0 for k, v in result['categories']['all_scores'].items()
}
tone_probs = {
k: v / 100.0 for k, v in result['tone']['scores'].items()
}
return sentiment_output, category_output, tone_output, sentiment_probs, category_probs, tone_probs
def format_comprehensive_analysis(self, comprehensive: Dict) -> str:
"""
์ข…ํ•ฉ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ํฌ๋งทํŒ…
Args:
comprehensive: ์ข…ํ•ฉ ๋ถ„์„ ๋”•์…”๋„ˆ๋ฆฌ
Returns:
๋งˆํฌ๋‹ค์šด ํ˜•์‹์˜ ๋ฌธ์ž์—ด
"""
output = "| ํ•ญ๋ชฉ | ๊ฐ์ • | ๋งŒ์กฑ๋„ | ๊ทผ๊ฑฐ |\n"
output += "|------|------|--------|------|\n"
# ํ•ญ๋ชฉ๋ณ„ ํ‰๊ฐ€
for item in comprehensive['item_ratings']:
stars = "โญ๏ธ" * item['rating']
sentiment = item.get('sentiment', '์ค‘๋ฆฝ')
output += f"| {item['category']} | {sentiment} | {stars} | {item['evidence']} |\n"
# ์ „์ฒด ํ†ค
tone_ratio = comprehensive['tone_ratio']
tone_evidence = comprehensive.get('tone_evidence', {"positive": "-", "negative": "-"})
tone_summary = ""
if tone_ratio['positive'] > tone_ratio['negative'] + 20:
tone_summary = "๊ธ์ •์ด ์šฐ์„ธํ•จ"
elif tone_ratio['negative'] > tone_ratio['positive'] + 20:
tone_summary = "๋ถ€์ •์ด ์šฐ์„ธํ•จ"
else:
tone_summary = "๊ธ์ •๊ณผ ๋ถ€์ •์ด ํ˜ผ์žฌ๋จ"
# ์ „์ฒด ํ†ค ๊ทผ๊ฑฐ ํฌ๋งทํŒ…: "๊ธ์ •: xxx / ๋ถ€์ •: xxx"
tone_evidence_text = f"๊ธ์ •: {tone_evidence['positive']} / ๋ถ€์ •: {tone_evidence['negative']}"
output += f"| ์ „์ฒด ํ†ค | {tone_summary} | ๊ธ์ • {tone_ratio['positive']} : ๋ถ€์ • {tone_ratio['negative']} | {tone_evidence_text} |\n"
return output
# ์ „์—ญ ๋ถ„์„๊ธฐ ์ธ์Šคํ„ด์Šค (Gradio ์•ฑ ์‹œ์ž‘ ์‹œ ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ)
analyzer = None
def get_analyzer():
"""๋ถ„์„๊ธฐ ์‹ฑ๊ธ€ํ†ค ์ธ์Šคํ„ด์Šค ๋ฐ˜ํ™˜"""
global analyzer
if analyzer is None:
analyzer = ReviewAnalyzer()
return analyzer
def create_gradio_app():
"""Gradio ์›น ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์ƒ์„ฑ"""
# ๋ถ„์„๊ธฐ ์ดˆ๊ธฐํ™”
review_analyzer = get_analyzer()
# ์ƒ˜ํ”Œ ๋ฆฌ๋ทฐ ์˜ˆ์‹œ
examples = [
["์ •๋ง ์ข‹์€ ์ œํ’ˆ์ด์—์š”! ๋ฐฐ์†ก๋„ ๋น ๋ฅด๊ณ  ํ’ˆ์งˆ๋„ ํ›Œ๋ฅญํ•ฉ๋‹ˆ๋‹ค. ๋‹ค์Œ์—๋„ ๋˜ ๊ตฌ๋งคํ• ๊ฒŒ์š”!"],
["์™„์ „ ์‹ค๋ง์ด์—์š”. ์‚ฌ์ง„์ด๋ž‘ ์™„์ „ ๋‹ค๋ฅด๊ณ  ํ’ˆ์งˆ๋„ ๋ณ„๋กœ์ž…๋‹ˆ๋‹ค. ํ™˜๋ถˆ ์‹ ์ฒญํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค๋งŒ ํ™˜๋ถˆ ์ฒ˜๋ฆฌ๋Š” ๋นจ๋ผ์„œ ์ข‹์•˜์–ด์š”."],
["ํ•๋„ ๋„˜์ด์˜๊ณ  ์‚ฌ์ด์ฆˆ๋„ ๋”ฑ๋งž๊ณ  ๋‹ค์ข‹์€๋ฐ ํ„ธ๋น ์ง์ด ์žฅ๋‚œ์ด ์•„๋‹ˆ์˜ˆ์š”~~๊ฐ์ˆ˜ํ• ๋งŒํ•œ๋ฐ ์€๊ทผ ์งœ์ฆ๋‚ ์ˆ˜๋„? ๊ทธ๋ƒฅ ์ž…์œผ๋ฉด ๊ณ ์–‘์ด๋งˆ๋ƒฅ ํ„ธ์„ ๋ฟœ๋‚ด์š” ใ…Žใ…Ž"],
["ํ…”๋ ˆ๊ทธ๋žจ @abcd1234๋กœ ์—ฐ๋ฝ์ฃผ์‹œ๋ฉด ๋ฐ˜๊ฐ’์— ๋“œ๋ฆฝ๋‹ˆ๋‹ค. ๋„๋งค๊ฐ€๋กœ ํŒ๋งค์ค‘!"],
["๋ฐฐ์†ก์ด ์ƒ๊ฐ๋ณด๋‹ค ๋นจ๋ผ์„œ ์ข‹์•˜์–ด์š”. ํ’ˆ์งˆ๋„ ๊ดœ์ฐฎ๊ณ  ๊ฐ€๊ฒฉ๋Œ€๋น„ ๋งŒ์กฑํ•ฉ๋‹ˆ๋‹ค."],
["์‚ฌ์ด์ฆˆ๊ฐ€ ๋„ˆ๋ฌด ์ž‘์•„์š”. ๊ตํ™˜ํ•˜๋ ค๊ณ  ํ–ˆ๋Š”๋ฐ ์ ˆ์ฐจ๊ฐ€ ๋ณต์žกํ•˜๋„ค์š”."],
["๋””์ž์ธ์€ ์˜ˆ์œ๋ฐ ํ’ˆ์งˆ์ด ๊ฐ€๊ฒฉ์— ๋น„ํ•ด ๋ณ„๋กœ์ž…๋‹ˆ๋‹ค. ๊ทธ๋ƒฅ์ €๋ƒฅ์ด์—์š”."],
["์„ธํŠธ ๊ฐ€๊ฒฉ ๊ฐ€์„ฑ๋น„ ์ตœ๊ณ ์˜ˆ์šฉโค๏ธ๐Ÿคใ€€๋”ฐ๋œปํ•˜๊ณ  ํญ๋‹ฅํญ๋‹ฅํ•œ ๋А๋‚Œ ๋„ˆ๋ฌด ์กฐ์•„์—ฌ!! ํ• ๋„ˆ๋ฌด ์˜ˆ๋ป์šฉ!!!"]
]
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ - ๋ชจ๋˜ ๋Œ€์‹œ๋ณด๋“œ ๋ ˆ์ด์•„์›ƒ
with gr.Blocks(
title="๋ฆฌ๋ทฐ 3๋‹จ๊ณ„ ๋ถ„์„ ์„œ๋น„์Šค",
theme=gr.themes.Default(
primary_hue="blue",
secondary_hue="slate",
neutral_hue="slate",
font=gr.themes.GoogleFont("Noto Sans KR")
),
css="""
.card-header {
font-size: 1.2em;
font-weight: bold;
margin-bottom: 10px;
padding: 10px;
border-radius: 8px;
text-align: center;
}
.sentiment-positive { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; }
.sentiment-neutral { background: #6b7280; color: white; }
.sentiment-negative { background: linear-gradient(135deg, #fa709a 0%, #fee140 100%); color: white; }
.metric-card {
border: 2px solid #e5e7eb;
border-radius: 12px;
padding: 10px;
background: white;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.big-emoji { font-size: 3em; text-align: center; margin: 10px 0; }
.big-text { font-size: 1.8em; font-weight: bold; text-align: center; margin: 5px 0; }
.confidence { font-size: 1.2em; color: #6b7280; text-align: center; }
/* Label ์ปดํฌ๋„ŒํŠธ ํŒจ๋”ฉ ์กฐ์ • - ๊ฐ ํ™•๋ฅ  ํ•ญ๋ชฉ๋“ค์˜ ํŒจ๋”ฉ ์ค„์ด๊ธฐ */
.label .output-class { padding: 6px 12px !important; }
.label-wrap .output-class { padding: 6px 12px !important; }
.compact-label .output-class { padding: 6px 12px !important; }
"""
) as demo:
# ํ—ค๋”
gr.Markdown("""
# ๐Ÿ” ๋ฆฌ๋ทฐ ๋ถ„์„ ๋Œ€์‹œ๋ณด๋“œ
AI ๊ธฐ๋ฐ˜ 3๋‹จ๊ณ„ ๋ถ„์„์œผ๋กœ ๋ฆฌ๋ทฐ๋ฅผ ์ž๋™์œผ๋กœ ๊ฒ€์ˆ˜ํ•˜๊ณ  ์ธ์‚ฌ์ดํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
""")
# 2๋‹จ ๋ ˆ์ด์•„์›ƒ: ๋ฆฌ๋ทฐ์ž…๋ ฅ / ๋ถ„์„๊ฒฐ๊ณผ
with gr.Row():
# ์™ผ์ชฝ: ์ž…๋ ฅ ์„น์…˜
with gr.Column(scale=1):
gr.Markdown("## ๋ฆฌ๋ทฐ ์ž…๋ ฅ")
review_input = gr.Textbox(
label="TextBox",
placeholder="๋ถ„์„ํ•  ๋ฆฌ๋ทฐ ๋‚ด์šฉ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”...",
lines=10,
max_lines=20
)
submit_btn = gr.Button("๐Ÿ” ๋ถ„์„ ์‹œ์ž‘", variant="primary", size="lg")
gr.Examples(
examples=examples,
inputs=review_input,
label="๐Ÿ’ก ์˜ˆ์‹œ ๋ฆฌ๋ทฐ"
)
# ์˜ค๋ฅธ์ชฝ: 3๋‹จ๊ณ„ ๋ถ„์„
with gr.Column(scale=1):
gr.Markdown("## ๋ถ„์„ ๊ฒฐ๊ณผ")
# 1๋‹จ๊ณ„: ๊ฐ์ • ๋ถ„์„
gr.HTML('<div class="card-header sentiment-neutral">1. ๊ฐ์ • ๋ถ„์„</div>')
with gr.Group(elem_classes="metric-card"):
sentiment_output = gr.Textbox(
label="",
lines=1,
interactive=False,
show_label=False,
container=False,
elem_classes="big-text",
visible=False
)
sentiment_prob = gr.Label(
label="ํ™•๋ฅ  ๋ถ„ํฌ",
num_top_classes=3,
show_label=False,
elem_classes="compact-label"
)
# 2๋‹จ๊ณ„: ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„
gr.HTML('<div class="card-header sentiment-neutral">2. ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„์„</div>')
with gr.Group(elem_classes="metric-card"):
category_output = gr.Textbox(
label="",
lines=4,
interactive=False,
show_label=False,
container=False,
visible=False
)
category_prob = gr.Label(
label="ํ™•๋ฅ  ๋ถ„ํฌ",
num_top_classes=5,
show_label=False,
elem_classes="compact-label"
)
# 3๋‹จ๊ณ„: ํ†ค ํƒ์ง€
gr.HTML('<div class="card-header sentiment-neutral">3. ๋ฆฌ๋ทฐ ํ†ค ํƒ์ง€</div>')
with gr.Group(elem_classes="metric-card"):
tone_output = gr.Textbox(
label="",
lines=1,
interactive=False,
show_label=False,
container=False,
elem_classes="big-text",
visible=False
)
tone_prob = gr.Label(
label="ํ™•๋ฅ  ๋ถ„ํฌ",
num_top_classes=3,
show_label=False,
elem_classes="compact-label"
)
# ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
submit_btn.click(
fn=review_analyzer.analyze_for_gradio,
inputs=review_input,
outputs=[sentiment_output, category_output, tone_output,
sentiment_prob, category_prob, tone_prob]
)
review_input.submit(
fn=review_analyzer.analyze_for_gradio,
inputs=review_input,
outputs=[sentiment_output, category_output, tone_output,
sentiment_prob, category_prob, tone_prob]
)
return demo
def main():
"""๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜"""
print("๋ฆฌ๋ทฐ ์ž๋™ ๊ฒ€์ˆ˜ ์„œ๋น„์Šค ์‹œ์ž‘")
print("-" * 80)
# Gradio ์•ฑ ์‹คํ–‰
app = create_gradio_app()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
inbrowser=True
)
if __name__ == "__main__":
main()