Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| ๋ฆฌ๋ทฐ ์๋ ๊ฒ์ ์๋น์ค | |
| Hugging Face์ Zero-Shot Classification ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ ๋ฆฌ๋ทฐ๋ฅผ 3๋จ๊ณ๋ก ๋ถ์ํฉ๋๋ค. | |
| ๋ถ์ ๋จ๊ณ: | |
| 1. ๊ฐ์ ๋ถ์: ๊ธ์ / ์ค๋ฆฝ / ๋ถ์ | |
| 2. ์นดํ ๊ณ ๋ฆฌ ๋ถ์: ๋ฐฐ์ก / ํ์ง / ์ฌ์ด์ฆ / ๊ตํ / ์๋น์ค ๋ฑ | |
| 3. ๋ฆฌ๋ทฐ ํค ํ์ง: ๋ถ๋ง / ์์ค / ํ์ํ๊ธฐ / ๊ด๊ณ ๋ฑ | |
| """ | |
| from transformers import pipeline | |
| import json | |
| from typing import List, Dict, Tuple | |
| from datetime import datetime | |
| import gradio as gr | |
| class ReviewAnalyzer: | |
| """๋ฆฌ๋ทฐ๋ฅผ 3๋จ๊ณ๋ก ๋ถ์ํ๋ ํด๋์ค | |
| 1. ๊ฐ์ ๋ถ์: ๊ธ์ / ์ค๋ฆฝ / ๋ถ์ | |
| 2. ์นดํ ๊ณ ๋ฆฌ ๋ถ์: ๋ฐฐ์ก / ํ์ง / ์ฌ์ด์ฆ / ๊ตํ / ์๋น์ค ๋ฑ | |
| 3. ๋ฆฌ๋ทฐ ํค ํ์ง: ๋ถ๋ง / ์์ค / ํ์ํ๊ธฐ / ๊ด๊ณ ๋ฑ | |
| """ | |
| def __init__(self): | |
| """Zero-Shot Classification ํ์ดํ๋ผ์ธ ์ด๊ธฐํ""" | |
| print("๋ชจ๋ธ ๋ก๋ฉ ์ค...") | |
| # ํ๊ตญ์ด๋ฅผ ์ ์ดํดํ๋ multilingual ๋ชจ๋ธ ์ฌ์ฉ | |
| self.classifier = pipeline( | |
| "zero-shot-classification", | |
| model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7" | |
| ) | |
| # 1๋จ๊ณ: ๊ฐ์ ๋ถ์ (๊ฐ์ ๋ ํ๋กฌํํธ - ๊ตฌ์ฒด์ ์์ ํฌํจ) | |
| self.sentiment_categories = [ | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ด๋ ์๋น์ค์ ๋ง์กฑํ๋ฉฐ ์ข์ํ๊ณ ์ถ์ฒํ๋ ๊ธ์ ์ ์ธ ๊ฐ์ ์ ํํํฉ๋๋ค. ์: ์ข์์, ๋ง์กฑ, ์ถ์ฒ, ํ๋ฅญ, ์ต๊ณ , ๊ฐ์ฌ, ๋ง์์ ๋ค์ด์", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ด๋ ์๋น์ค์ ๋ํด ์ค๋ฆฝ์ ์ด๊ณ ๊ฐ๊ด์ ์ผ๋ก ์ฌ์ค์ด๋ ์ํ๋ง์ ๋์ดํ๋ฉฐ ํน๋ณํ ๊ฐ์ ํํ์ด ์์ต๋๋ค. ์: ๊ทธ๋ฅ ๊ทธ๋์, ๋ณดํต, ๋ฌด๋, ํ๋ฒ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ด๋ ์๋น์ค์ ์ค๋งํ๊ณ ๋ถ๋ง์กฑ์ค๋ฌ์ด ๋ถ์ ์ ์ธ ๊ฐ์ ์ ํํํฉ๋๋ค. ์: ๋ณ๋ก, ์ค๋ง, ๋ถ๋ง์กฑ, ์ต์ , ํ๋จ, ํํ, ํ๋ถ" | |
| ] | |
| self.sentiment_mapping = { | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ด๋ ์๋น์ค์ ๋ง์กฑํ๋ฉฐ ์ข์ํ๊ณ ์ถ์ฒํ๋ ๊ธ์ ์ ์ธ ๊ฐ์ ์ ํํํฉ๋๋ค. ์: ์ข์์, ๋ง์กฑ, ์ถ์ฒ, ํ๋ฅญ, ์ต๊ณ , ๊ฐ์ฌ, ๋ง์์ ๋ค์ด์": "๊ธ์ ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ด๋ ์๋น์ค์ ๋ํด ์ค๋ฆฝ์ ์ด๊ณ ๊ฐ๊ด์ ์ผ๋ก ์ฌ์ค์ด๋ ์ํ๋ง์ ๋์ดํ๋ฉฐ ํน๋ณํ ๊ฐ์ ํํ์ด ์์ต๋๋ค. ์: ๊ทธ๋ฅ ๊ทธ๋์, ๋ณดํต, ๋ฌด๋, ํ๋ฒ": "์ค๋ฆฝ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ด๋ ์๋น์ค์ ์ค๋งํ๊ณ ๋ถ๋ง์กฑ์ค๋ฌ์ด ๋ถ์ ์ ์ธ ๊ฐ์ ์ ํํํฉ๋๋ค. ์: ๋ณ๋ก, ์ค๋ง, ๋ถ๋ง์กฑ, ์ต์ , ํ๋จ, ํํ, ํ๋ถ": "๋ถ์ " | |
| } | |
| # 2๋จ๊ณ: ์นดํ ๊ณ ๋ฆฌ ๋ถ์ (๊ฐ์ ๋ ํ๋กฌํํธ) | |
| self.topic_categories = [ | |
| "์ด ๋ฆฌ๋ทฐ๋ ๋ฐฐ์ก๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๋ฐฐ์ก ๋น ๋ฆ, ๋ฐฐ์ก ๋ฆ์, ํฌ์ฅ ์ํ, ํ๋ฐฐ, ๋์ฐฉ, ํ์", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ ํ์ง ๋๋ ๋์์ธ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ์ฌ์ง, ๋ด๊ตฌ์ฑ, ์์ฑ๋, ํ์ง ์ข์, ํ์ง ๋์จ, ํผํผ, ์ฝํจ, ๋์์ธ, ์์, ์ธ๊ด, ์์จ, ์คํ์ผ, ๋ชจ์, ์๊น", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ ์ฌ์ด์ฆ์ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ํฌ๊ธฐ, ์ฌ์ด์ฆ, ํ, ์์, ํผ, ๋ฑ ๋ง์, ์น์", | |
| "์ด ๋ฆฌ๋ทฐ๋ ๊ตํ/ํ๋ถ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ตํ, ํ๋ถ, ๋ฐํ, ํ๋ถ ์ ์ฒญ, ๊ตํ ์ ์ฐจ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ๊ณ ๊ฐ ์๋น์ค์ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ณ ๊ฐ์ผํฐ, ์๋, ์๋ด, A/S, ์น์ , ๋ถ์น์ ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ๊ฐ๊ฒฉ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ฐ๊ฒฉ, ๊ฐ์ฑ๋น, ๋น์, ์ ๋ ด, ํ ์ธ, ๋น์ฉ, ๋", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ ๊ธฐ๋ฅ/์ฑ๋ฅ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ธฐ๋ฅ, ์ฑ๋ฅ, ์๋, ํจ๊ณผ, ์ฌ์ฉ๊ฐ, ํธ๋ฆฌํจ" | |
| ] | |
| self.topic_mapping = { | |
| "์ด ๋ฆฌ๋ทฐ๋ ๋ฐฐ์ก๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๋ฐฐ์ก ๋น ๋ฆ, ๋ฐฐ์ก ๋ฆ์, ํฌ์ฅ ์ํ, ํ๋ฐฐ, ๋์ฐฉ, ํ์": "๋ฐฐ์ก", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ ํ์ง ๋๋ ๋์์ธ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ์ฌ์ง, ๋ด๊ตฌ์ฑ, ์์ฑ๋, ํ์ง ์ข์, ํ์ง ๋์จ, ํผํผ, ์ฝํจ, ๋์์ธ, ์์, ์ธ๊ด, ์์จ, ์คํ์ผ, ๋ชจ์, ์๊น": "ํ์ง/๋์์ธ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ ์ฌ์ด์ฆ์ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ํฌ๊ธฐ, ์ฌ์ด์ฆ, ํ, ์์, ํผ, ๋ฑ ๋ง์, ์น์": "์ฌ์ด์ฆ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ๊ตํ/ํ๋ถ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ตํ, ํ๋ถ, ๋ฐํ, ํ๋ถ ์ ์ฒญ, ๊ตํ ์ ์ฐจ": "๊ตํ/ํ๋ถ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ๊ณ ๊ฐ ์๋น์ค์ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ณ ๊ฐ์ผํฐ, ์๋, ์๋ด, A/S, ์น์ , ๋ถ์น์ ": "์๋น์ค", | |
| "์ด ๋ฆฌ๋ทฐ๋ ๊ฐ๊ฒฉ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ฐ๊ฒฉ, ๊ฐ์ฑ๋น, ๋น์, ์ ๋ ด, ํ ์ธ, ๋น์ฉ, ๋": "๊ฐ๊ฒฉ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ ๊ธฐ๋ฅ/์ฑ๋ฅ๊ณผ ๊ด๋ จ๋ ๋ด์ฉ์ ์ธ๊ธํฉ๋๋ค. ์: ๊ธฐ๋ฅ, ์ฑ๋ฅ, ์๋, ํจ๊ณผ, ์ฌ์ฉ๊ฐ, ํธ๋ฆฌํจ": "๊ธฐ๋ฅ/์ฑ๋ฅ" | |
| } | |
| # 3๋จ๊ณ: ๋ฆฌ๋ทฐ ํค ํ์ง (๊ฐ์ ๋ ํ๋กฌํํธ - ์ผ๋ฐ ์ฐ์ ) | |
| self.tone_categories = [ | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ ๋ํ ์์งํ ๊ฐ์๊ณผ ํ๊ฐ๋ฅผ ๋ด๊ณ ์์ผ๋ฉฐ, ๊ธ์ ์ ์ด๋ ๋ถ์ ์ ์ด๋ ์ง์ค๋ ์ฌ์ฉ ๊ฒฝํ์ ๊ณต์ ํฉ๋๋ค", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ ๊ฒฐํจ, ๋ฐฐ์ก์ง์ฐ, ์๋น์ค ๋ฌธ์ ๋ฑ ๋ช ๋ฐฑํ ๋ถ๋ง์ฌํญ์ ์ธ๊ธํ๋ฉฐ ๋ถ์ ์ ์ธ ๊ฒฝํ์ ํํํฉ๋๋ค", | |
| "์ด ๋ฆฌ๋ทฐ๋ ํ ๋ ๊ทธ๋จ, ์นด์นด์คํก ๋ฑ ๋ฉ์ ์ ์์ด๋(@๋ก ์์), ์ ํ๋ฒํธ, ์ด๋ฉ์ผ ๊ฐ์ ์ฐ๋ฝ์ฒ๋ฅผ ํฌํจํ๊ฑฐ๋, '์ฐ๋ฝ์ฃผ์ธ์', '๋๋งค๊ฐ', '๋ฐ๊ฐ', 'ํ ์ธ', '์ฟ ํฐ' ๋ฑ์ผ๋ก ๋ค๋ฅธ ํ๋งค์ฒ๋ ๊ฑฐ๋๋ฅผ ์ ๋ํ๋ ๋ช ๋ฐฑํ ๊ด๊ณ /์คํธ ๋ด์ฉ์ ๋๋ค" | |
| ] | |
| self.tone_mapping = { | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ ๋ํ ์์งํ ๊ฐ์๊ณผ ํ๊ฐ๋ฅผ ๋ด๊ณ ์์ผ๋ฉฐ, ๊ธ์ ์ ์ด๋ ๋ถ์ ์ ์ด๋ ์ง์ค๋ ์ฌ์ฉ ๊ฒฝํ์ ๊ณต์ ํฉ๋๋ค": "์ผ๋ฐ", | |
| "์ด ๋ฆฌ๋ทฐ๋ ์ ํ์ ๊ฒฐํจ, ๋ฐฐ์ก์ง์ฐ, ์๋น์ค ๋ฌธ์ ๋ฑ ๋ช ๋ฐฑํ ๋ถ๋ง์ฌํญ์ ์ธ๊ธํ๋ฉฐ ๋ถ์ ์ ์ธ ๊ฒฝํ์ ํํํฉ๋๋ค": "๋ถ๋ง", | |
| "์ด ๋ฆฌ๋ทฐ๋ ํ ๋ ๊ทธ๋จ, ์นด์นด์คํก ๋ฑ ๋ฉ์ ์ ์์ด๋(@๋ก ์์), ์ ํ๋ฒํธ, ์ด๋ฉ์ผ ๊ฐ์ ์ฐ๋ฝ์ฒ๋ฅผ ํฌํจํ๊ฑฐ๋, '์ฐ๋ฝ์ฃผ์ธ์', '๋๋งค๊ฐ', '๋ฐ๊ฐ', 'ํ ์ธ', '์ฟ ํฐ' ๋ฑ์ผ๋ก ๋ค๋ฅธ ํ๋งค์ฒ๋ ๊ฑฐ๋๋ฅผ ์ ๋ํ๋ ๋ช ๋ฐฑํ ๊ด๊ณ /์คํธ ๋ด์ฉ์ ๋๋ค": "๊ด๊ณ " | |
| } | |
| print("๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ!") | |
| print("โ 3๋จ๊ณ ๋ถ์ ๋ชจ๋ ํ์ฑํ (๊ฐ์ โ ์นดํ ๊ณ ๋ฆฌ โ ํค)") | |
| def preprocess_text(self, text: str) -> str: | |
| """ | |
| ํ ์คํธ ์ ์ฒ๋ฆฌ (์ฑ๋ฅ ๊ฐ์ ์ฉ) | |
| Args: | |
| text: ์๋ณธ ํ ์คํธ | |
| Returns: | |
| ์ ์ฒ๋ฆฌ๋ ํ ์คํธ | |
| """ | |
| # ์๋ค ๊ณต๋ฐฑ ์ ๊ฑฐ | |
| text = text.strip() | |
| # ์ฐ์๋ ๊ณต๋ฐฑ์ ํ๋๋ก | |
| import re | |
| text = re.sub(r'\s+', ' ', text) | |
| return text | |
| def split_into_sentences(self, text: str) -> List[str]: | |
| """ | |
| ํ ์คํธ๋ฅผ ๋ฌธ์ฅ ๋จ์๋ก ๋ถ๋ฆฌ | |
| Args: | |
| text: ์๋ณธ ํ ์คํธ | |
| Returns: | |
| ๋ฌธ์ฅ ๋ฆฌ์คํธ | |
| """ | |
| import re | |
| # ๋ฌธ์ฅ ์ข ๊ฒฐ ๊ธฐํธ๋ฅผ ๊ธฐ์ค์ผ๋ก ๋ถ๋ฆฌ (., !, ?, ~, ใ ใ , ใ ใ ๋ฑ ๊ณ ๋ ค) | |
| # ์ด๋ชจํฐ์ฝ๊ณผ ํน์๋ฌธ์ ํจํด ๋ณด์กด | |
| sentences = re.split(r'[.!?~]+\s*', text) | |
| # ๋น ๋ฌธ์ฅ ์ ๊ฑฐ ๋ฐ ์ ๋ฆฌ | |
| sentences = [s.strip() for s in sentences if s.strip() and len(s.strip()) > 2] | |
| return sentences if sentences else [text] | |
| def analyze_sentiment(self, text: str, use_sentence_split: bool = True) -> Dict: | |
| """ | |
| 1๋จ๊ณ: ๊ฐ์ ๋ถ์ (๊ธ์ / ์ค๋ฆฝ / ๋ถ์ ) | |
| Args: | |
| text: ๋ฆฌ๋ทฐ ํ ์คํธ | |
| use_sentence_split: ๋ฌธ์ฅ ๋ถ๋ฆฌ ํ ๋ถ์ ์ฌ๋ถ (๊ธด ๋ฌธ์ฅ ๊ฐ์ ์ฉ) | |
| Returns: | |
| ๊ฐ์ ๋ถ์ ๊ฒฐ๊ณผ | |
| """ | |
| # ๊ธด ๋ฌธ์ฅ(100์ ์ด์)์ธ ๊ฒฝ์ฐ ๋ฌธ์ฅ ๋ถ๋ฆฌ ํ ๋ถ์ | |
| if use_sentence_split and len(text) > 100: | |
| sentences = self.split_into_sentences(text) | |
| if len(sentences) > 1: | |
| # ๊ฐ ๋ฌธ์ฅ๋ณ ๊ฐ์ ์ ์ ์์ง | |
| all_scores = {cat: [] for cat in self.sentiment_mapping.values()} | |
| for sentence in sentences: | |
| result = self.classifier( | |
| sentence, | |
| self.sentiment_categories, | |
| multi_label=False | |
| ) | |
| # ๊ฐ ์นดํ ๊ณ ๋ฆฌ๋ณ ์ ์ ์์ง | |
| for label, score in zip(result['labels'], result['scores']): | |
| category = self.sentiment_mapping[label] | |
| all_scores[category].append(score) | |
| # ํ๊ท ์ ์ ๊ณ์ฐ | |
| avg_scores = { | |
| cat: sum(scores) / len(scores) if scores else 0 | |
| for cat, scores in all_scores.items() | |
| } | |
| # ๊ฐ์ฅ ๋์ ์ ์์ ๊ฐ์ ์ ํ | |
| top_sentiment = max(avg_scores.items(), key=lambda x: x[1]) | |
| sentiment = top_sentiment[0] | |
| confidence = top_sentiment[1] | |
| scores_dict = { | |
| cat: round(score * 100, 2) | |
| for cat, score in avg_scores.items() | |
| } | |
| return { | |
| "sentiment": sentiment, | |
| "confidence": round(confidence * 100, 2), | |
| "scores": scores_dict, | |
| "method": "sentence_split" | |
| } | |
| # ๊ธฐ๋ณธ ๋จ์ผ ๋ถ์ | |
| result = self.classifier( | |
| text, | |
| self.sentiment_categories, | |
| multi_label=False | |
| ) | |
| top_category = result['labels'][0] | |
| top_score = result['scores'][0] | |
| sentiment = self.sentiment_mapping[top_category] | |
| scores_dict = { | |
| self.sentiment_mapping[label]: round(score * 100, 2) | |
| for label, score in zip(result['labels'], result['scores']) | |
| } | |
| return { | |
| "sentiment": sentiment, | |
| "confidence": round(top_score * 100, 2), | |
| "scores": scores_dict, | |
| "method": "single" | |
| } | |
| def analyze_category(self, text: str, top_k: int = 3, use_sentence_split: bool = True, min_threshold: float = 0.25) -> Dict: | |
| """ | |
| 2๋จ๊ณ: ์นดํ ๊ณ ๋ฆฌ ๋ถ์ (๋ฐฐ์ก / ํ์ง / ์ฌ์ด์ฆ / ๊ตํ / ์๋น์ค ๋ฑ) | |
| Args: | |
| text: ๋ฆฌ๋ทฐ ํ ์คํธ | |
| top_k: ์์ ๋ช ๊ฐ ์นดํ ๊ณ ๋ฆฌ๋ฅผ ๋ฐํํ ์ง (๊ธฐ๋ณธ 3๊ฐ) | |
| use_sentence_split: ๋ฌธ์ฅ ๋ถ๋ฆฌ ํ ๋ถ์ ์ฌ๋ถ (๊ธด ๋ฌธ์ฅ ๊ฐ์ ์ฉ) | |
| min_threshold: ์นดํ ๊ณ ๋ฆฌ ์ ํ ์ต์ ์๊ณ๊ฐ (๊ธฐ๋ณธ 0.25 = 25%) | |
| Returns: | |
| ์นดํ ๊ณ ๋ฆฌ ๋ถ์ ๊ฒฐ๊ณผ | |
| """ | |
| # ๊ธด ๋ฌธ์ฅ์ธ ๊ฒฝ์ฐ ๋ฌธ์ฅ๋ณ๋ก ๋ถ์ ํ ์ง๊ณ | |
| if use_sentence_split and len(text) > 100: | |
| sentences = self.split_into_sentences(text) | |
| if len(sentences) > 1: | |
| # ๊ฐ ์นดํ ๊ณ ๋ฆฌ๋ณ ์ ์ ๋์ | |
| accumulated_scores = {cat: [] for cat in self.topic_mapping.values()} | |
| for sentence in sentences: | |
| result = self.classifier( | |
| sentence, | |
| self.topic_categories, | |
| multi_label=True | |
| ) | |
| # ์นดํ ๊ณ ๋ฆฌ๋ณ ์ ์ ์์ง | |
| for label, score in zip(result['labels'], result['scores']): | |
| category = self.topic_mapping[label] | |
| accumulated_scores[category].append(score) | |
| # ์ต๋ ์ ์๋ก ์ง๊ณ (์ด๋ ํ ๋ฌธ์ฅ์์๋ผ๋ ๋๊ฒ ๋์ค๋ฉด ํด๋น ์นดํ ๊ณ ๋ฆฌ๋ก ์ธ์ ) | |
| max_scores = { | |
| cat: max(scores) if scores else 0 | |
| for cat, scores in accumulated_scores.items() | |
| } | |
| # ์ ์ ๊ธฐ์ค์ผ๋ก ์ ๋ ฌ | |
| sorted_categories = sorted(max_scores.items(), key=lambda x: x[1], reverse=True) | |
| # ์์ k๊ฐ ์ ํ (์๊ณ๊ฐ ์ด์๋ง) | |
| categories = [] | |
| for cat, score in sorted_categories[:top_k]: | |
| if score >= min_threshold: | |
| categories.append({ | |
| "category": cat, | |
| "confidence": round(score * 100, 2) | |
| }) | |
| all_scores = { | |
| cat: round(score * 100, 2) | |
| for cat, score in sorted_categories | |
| } | |
| return { | |
| "main_categories": categories, | |
| "all_scores": all_scores, | |
| "method": "sentence_split" | |
| } | |
| # ๊ธฐ๋ณธ ๋จ์ผ ๋ถ์ | |
| result = self.classifier( | |
| text, | |
| self.topic_categories, | |
| multi_label=True # ์ฌ๋ฌ ์นดํ ๊ณ ๋ฆฌ๊ฐ ๋์์ ํด๋น๋ ์ ์์ | |
| ) | |
| # ์์ k๊ฐ์ ์นดํ ๊ณ ๋ฆฌ ์ถ์ถ | |
| categories = [] | |
| for i in range(min(top_k, len(result['labels']))): | |
| label = result['labels'][i] | |
| score = result['scores'][i] | |
| # ์๊ณ๊ฐ ์ด์์ ํ์ ๋๋ฅผ ๊ฐ์ง ์นดํ ๊ณ ๋ฆฌ๋ง ํฌํจ | |
| if score >= min_threshold: | |
| categories.append({ | |
| "category": self.topic_mapping[label], | |
| "confidence": round(score * 100, 2) | |
| }) | |
| all_scores = { | |
| self.topic_mapping[label]: round(score * 100, 2) | |
| for label, score in zip(result['labels'], result['scores']) | |
| } | |
| return { | |
| "main_categories": categories, | |
| "all_scores": all_scores, | |
| "method": "single" | |
| } | |
| def analyze_tone(self, text: str) -> Dict: | |
| """ | |
| 3๋จ๊ณ: ๋ฆฌ๋ทฐ ํค ํ์ง (๋ถ๋ง / ์์ค / ํ์ํ๊ธฐ / ๊ด๊ณ ๋ฑ) | |
| Args: | |
| text: ๋ฆฌ๋ทฐ ํ ์คํธ | |
| Returns: | |
| ํค ๋ถ์ ๊ฒฐ๊ณผ | |
| """ | |
| result = self.classifier( | |
| text, | |
| self.tone_categories, | |
| multi_label=False | |
| ) | |
| top_category = result['labels'][0] | |
| top_score = result['scores'][0] | |
| tone = self.tone_mapping[top_category] | |
| scores_dict = { | |
| self.tone_mapping[label]: round(score * 100, 2) | |
| for label, score in zip(result['labels'], result['scores']) | |
| } | |
| return { | |
| "tone": tone, | |
| "confidence": round(top_score * 100, 2), | |
| "scores": scores_dict | |
| } | |
| def generate_rating_from_sentiment(self, category: str, confidence: float, sentiment: str) -> int: | |
| """ | |
| ์นดํ ๊ณ ๋ฆฌ๋ณ ๊ฐ์ ๊ณผ ํ์ ๋๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ๋ณ์ ์์ฑ | |
| Args: | |
| category: ์นดํ ๊ณ ๋ฆฌ๋ช | |
| confidence: ํ์ ๋ (0-100) | |
| sentiment: ๊ฐ์ (๊ธ์ /์ค๋ฆฝ/๋ถ์ ) | |
| Returns: | |
| ๋ณ์ (1-5) | |
| """ | |
| # ๊ธฐ๋ณธ ์ ์: ๊ฐ์ ์ ๋ฐ๋ผ | |
| if sentiment == "๊ธ์ ": | |
| base_score = 4.5 | |
| elif sentiment == "์ค๋ฆฝ": | |
| base_score = 3.0 | |
| else: # ๋ถ์ | |
| base_score = 1.5 | |
| # ํ์ ๋์ ๋ฐ๋ผ ์ ์ ์กฐ์ | |
| confidence_factor = confidence / 100.0 | |
| final_score = base_score * confidence_factor + 2.5 * (1 - confidence_factor) | |
| # 1-5 ์ฌ์ด๋ก ํด๋จํ | |
| final_score = max(1, min(5, final_score)) | |
| return round(final_score) | |
| def extract_evidence_from_text(self, text: str, category: str, sentiment: str = None) -> str: | |
| """ | |
| ํ ์คํธ์์ ํน์ ์นดํ ๊ณ ๋ฆฌ ๊ด๋ จ ๊ทผ๊ฑฐ ๋ฌธ์ฅ ์ถ์ถ | |
| ์นดํ ๊ณ ๋ฆฌ ํค์๋๊ฐ ํฌํจ๋ ์กฐ๊ฐ๋ง ์ถ์ถํ๋ฉฐ, ๊ฐ์ ๊ณผ ์ผ์นํ๋ ๊ทผ๊ฑฐ ์ฐ์ | |
| Args: | |
| text: ๋ฆฌ๋ทฐ ํ ์คํธ | |
| category: ์นดํ ๊ณ ๋ฆฌ๋ช | |
| sentiment: ํด๋น ์นดํ ๊ณ ๋ฆฌ์ ๊ฐ์ ("๊ธ์ "/"๋ถ์ "/"์ค๋ฆฝ", None์ด๋ฉด ๋ฌด์) | |
| Returns: | |
| ๊ทผ๊ฑฐ ๋ฌธ์ฅ (๋ฐ์ดํ๋ก ๊ฐ์ธ์ง ํํ) | |
| """ | |
| import re | |
| # ์นดํ ๊ณ ๋ฆฌ๋ณ ํค์๋ ๋งคํ | |
| keywords = { | |
| "๋ฐฐ์ก": ["๋ฐฐ์ก", "ํ๋ฐฐ", "๋์ฐฉ", "ํฌ์ฅ"], | |
| "ํ์ง/๋์์ธ": ["ํ์ง", "์ฌ์ง", "ํผํผ", "๋ด๊ตฌ", "์์ฑ๋", "ํธ๋น ์ง", "๋น ์ง", "๋์์ธ", "์์", "์คํ์ผ", "์ธ๊ด"], | |
| "์ฌ์ด์ฆ": ["์ฌ์ด์ฆ", "ํฌ๊ธฐ", "ํ", "์น์"], | |
| "๊ตํ/ํ๋ถ": ["๊ตํ", "ํ๋ถ", "๋ฐํ"], | |
| "์๋น์ค": ["์๋น์ค", "๊ณ ๊ฐ์ผํฐ", "์๋", "์น์ "], | |
| "๊ฐ๊ฒฉ": ["๊ฐ๊ฒฉ", "๊ฐ์ฑ๋น", "๋น์ธ", "์ ๋ ด", "ํ ์ธ", "๋"], | |
| "๊ธฐ๋ฅ/์ฑ๋ฅ": ["๊ธฐ๋ฅ", "์ฑ๋ฅ", "์๋", "ํจ๊ณผ", "์ฌ์ฉ"] | |
| } | |
| if category not in keywords: | |
| return "-" | |
| category_keywords = keywords[category] | |
| # ๊ฐ์ ํค์๋ | |
| positive_keywords = ["์ข", "ํ๋ฅญ", "๋ง์กฑ", "์ต๊ณ ", "์์", "์ด์", "๋ฑ๋ง", "๋น ๋ฅด", "๊ด์ฐฎ", "์๋ฒฝ", "๋ฉ์ง", "๊ฐ์ฌ"] | |
| negative_keywords = ["๋ณ๋ก", "์์ฝ", "์ค๋ง", "์ต์ ", "์ง์ฆ", "๋ฌธ์ ", "๋์", "ํํธ์", "์๋ง", "ํํ", "๋ค๋ฅด", "์", "๋ชป", "๋ณต์ก"] | |
| # ์ ์ฒด ํ ์คํธ๋ฅผ ์กฐ๊ฐ์ผ๋ก ๋๋๊ธฐ | |
| chunks = re.split(r'[,]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ์ง๋ง\s+|\s+์ธ๋ฐ\s+', text) | |
| matching_chunks = [] | |
| for chunk in chunks: | |
| chunk = chunk.strip() | |
| # ์ด ์กฐ๊ฐ์ ์นดํ ๊ณ ๋ฆฌ ํค์๋๊ฐ ์๋์ง ํ์ธ | |
| has_category = False | |
| for keyword in category_keywords: | |
| if keyword in chunk and len(chunk) > 5: | |
| has_category = True | |
| break | |
| if not has_category: | |
| continue | |
| # sentiment๊ฐ ์ง์ ๋ ๊ฒฝ์ฐ, ๊ฐ์ ๊ณผ ์ผ์นํ๋ ์กฐ๊ฐ ์ฐพ๊ธฐ | |
| if sentiment: | |
| chunk_lower = chunk.lower() | |
| has_positive = any(kw in chunk_lower for kw in positive_keywords) | |
| has_negative = any(kw in chunk_lower for kw in negative_keywords) | |
| # ๊ฐ์ ๊ณผ ์ผ์นํ๋์ง ํ์ธ | |
| if sentiment == "๊ธ์ " and has_positive and not has_negative: | |
| matching_chunks.append((chunk, True)) # ๊ฐ์ ์ผ์น | |
| elif sentiment == "๋ถ์ " and has_negative: | |
| matching_chunks.append((chunk, True)) # ๊ฐ์ ์ผ์น | |
| else: | |
| matching_chunks.append((chunk, False)) # ๊ฐ์ ๋ถ์ผ์น | |
| else: | |
| matching_chunks.append((chunk, True)) | |
| # ๊ฐ์ ์ด ์ผ์นํ๋ ์กฐ๊ฐ ์ฐ์ , ์์ผ๋ฉด ์ฒซ ๋ฒ์งธ ์กฐ๊ฐ | |
| for chunk, is_match in matching_chunks: | |
| if is_match: | |
| if len(chunk) > 20: | |
| chunk = chunk[:20] | |
| return f'"{chunk}"' | |
| # ๊ฐ์ ์ผ์น ์กฐ๊ฐ์ด ์์ผ๋ฉด ์ฒซ ๋ฒ์งธ ์กฐ๊ฐ ๋ฐํ | |
| if matching_chunks: | |
| chunk = matching_chunks[0][0] | |
| if len(chunk) > 20: | |
| chunk = chunk[:20] | |
| return f'"{chunk}"' | |
| return "-" | |
| def analyze_sentiment_for_category(self, text: str, category: str) -> str: | |
| """ | |
| ํน์ ์นดํ ๊ณ ๋ฆฌ์ ๋ํ ๊ฐ์ ๋ถ์ | |
| ์นดํ ๊ณ ๋ฆฌ ํค์๋ ๊ทผ์ฒ์ ๊ฐ์ ํํ๋ง ๋ถ์ํฉ๋๋ค. | |
| Args: | |
| text: ๋ฆฌ๋ทฐ ํ ์คํธ | |
| category: ์นดํ ๊ณ ๋ฆฌ๋ช | |
| Returns: | |
| ๊ฐ์ (๊ธ์ /์ค๋ฆฝ/๋ถ์ ) | |
| """ | |
| import re | |
| # ์นดํ ๊ณ ๋ฆฌ ๊ด๋ จ ํค์๋ | |
| keywords = { | |
| "๋ฐฐ์ก": ["๋ฐฐ์ก", "ํ๋ฐฐ", "๋์ฐฉ", "ํฌ์ฅ"], | |
| "ํ์ง/๋์์ธ": ["ํ์ง", "์ฌ์ง", "ํผํผ", "๋ด๊ตฌ", "์์ฑ๋", "ํธ๋น ์ง", "๋น ์ง", "๋์์ธ", "์์", "์คํ์ผ", "์ธ๊ด"], | |
| "์ฌ์ด์ฆ": ["์ฌ์ด์ฆ", "ํฌ๊ธฐ", "ํ", "์น์"], | |
| "๊ตํ/ํ๋ถ": ["๊ตํ", "ํ๋ถ", "๋ฐํ"], | |
| "์๋น์ค": ["์๋น์ค", "๊ณ ๊ฐ์ผํฐ", "์๋", "์น์ "], | |
| "๊ฐ๊ฒฉ": ["๊ฐ๊ฒฉ", "๊ฐ์ฑ๋น", "๋น์ธ", "์ ๋ ด", "ํ ์ธ", "๋"], | |
| "๊ธฐ๋ฅ/์ฑ๋ฅ": ["๊ธฐ๋ฅ", "์ฑ๋ฅ", "์๋", "ํจ๊ณผ", "์ฌ์ฉ"] | |
| } | |
| # ๋ถ์ ํค์๋ (๋ถ์ ํค์๋๋ฅผ ๋จผ์ ์ฒดํฌํด์ผ ์ ํํจ) | |
| negative_keywords = ["๋ณ๋ก", "์์ฝ", "์ค๋ง", "์ต์ ", "์ง์ฆ", "๋ฌธ์ ", "๋์", "ํํธ์", "์๋ง", "ํํ", "๋ค๋ฅด", "์", "๋ชป"] | |
| # ๊ธ์ ํค์๋ (๋ช ์์ ๊ธ์ ํํ) | |
| positive_keywords = ["์ข", "ํ๋ฅญ", "๋ง์กฑ", "์ต๊ณ ", "์์", "์ด์", "๋ฑ๋ง", "๋น ๋ฅด", "๊ด์ฐฎ", "์๋ฒฝ", "๋ฉ์ง", "๊ฐ์ฌ"] | |
| if category not in keywords: | |
| return "์ค๋ฆฝ" | |
| # ์นดํ ๊ณ ๋ฆฌ ํค์๋๊ฐ ํฌํจ๋ ๊ตฌ๊ฐ ์ฐพ๊ธฐ | |
| category_keywords = keywords[category] | |
| # ์ ์ฒด ํ ์คํธ๋ฅผ ์กฐ๊ฐ์ผ๋ก ๋๋๊ธฐ (์ผํ, ๊ทธ๋ฆฌ๊ณ , ํ์ง๋ง ๋ฑ์ผ๋ก ๋ถ๋ฆฌ) | |
| # ์: "๋ฐฐ์ก์ ๋น ๋ฅธ๋ฐ ํ์ง์ด ๋ณ๋ก์์" -> ["๋ฐฐ์ก์ ๋น ๋ฅธ๋ฐ", "ํ์ง์ด ๋ณ๋ก์์"] | |
| chunks = re.split(r'[,]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ์ง๋ง\s+|\s+์ธ๋ฐ\s+', text) | |
| for chunk in chunks: | |
| # ์ด ์กฐ๊ฐ์ ์นดํ ๊ณ ๋ฆฌ ํค์๋๊ฐ ์๋์ง ํ์ธ | |
| has_category = False | |
| for keyword in category_keywords: | |
| if keyword in chunk: | |
| has_category = True | |
| break | |
| if not has_category: | |
| continue | |
| # ์ด ์กฐ๊ฐ ๋ด์์๋ง ๊ฐ์ ํ๋จ | |
| chunk_lower = chunk.lower() | |
| # ๋ถ์ ํค์๋๋ฅผ ๋จผ์ ์ฒดํฌ (์ฐ์ ์์๊ฐ ๋์) | |
| for neg_keyword in negative_keywords: | |
| if neg_keyword in chunk_lower: | |
| return "๋ถ์ " | |
| # ๊ธ์ ํค์๋ ์ฒดํฌ | |
| for pos_keyword in positive_keywords: | |
| if pos_keyword in chunk_lower: | |
| return "๊ธ์ " | |
| # ๊ธฐ๋ณธ๊ฐ์ ์ค๋ฆฝ | |
| return "์ค๋ฆฝ" | |
| def extract_tone_evidence(self, text: str) -> Dict[str, str]: | |
| """ | |
| ์ ์ฒด ํค์ ๊ธ์ /๋ถ์ ๊ทผ๊ฑฐ ์ถ์ถ | |
| Args: | |
| text: ๋ฆฌ๋ทฐ ํ ์คํธ | |
| Returns: | |
| {"positive": "๊ธ์ ๊ทผ๊ฑฐ", "negative": "๋ถ์ ๊ทผ๊ฑฐ"} | |
| """ | |
| import re | |
| # ๊ธ์ ํค์๋ | |
| positive_keywords = ["์ข", "ํ๋ฅญ", "๋ง์กฑ", "์ต๊ณ ", "์์", "์ด์", "๋ฑ๋ง", "๋น ๋ฅด", "๊ด์ฐฎ", "์๋ฒฝ", "๋ฉ์ง", "๊ฐ์ฌ"] | |
| # ๋ถ์ ํค์๋ | |
| negative_keywords = ["๋ณ๋ก", "์์ฝ", "์ค๋ง", "์ต์ ", "์ง์ฆ", "๋ฌธ์ ", "๋์", "ํํธ์", "์๋ง", "ํํ", "๋ค๋ฅด", "๋ณต์ก", "๋ถํธ","๋๋ฆฌ", "๋๋ฆผ", "๋ฆ", "์ง์ฐ"] | |
| # ํ ์คํธ๋ฅผ ์กฐ๊ฐ์ผ๋ก ๋๋๊ธฐ | |
| chunks = re.split(r'[,.]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ์ง๋ง\s+|\s+์ธ๋ฐ\s+', text) | |
| positive_evidence = [] | |
| negative_evidence = [] | |
| for chunk in chunks: | |
| chunk = chunk.strip() | |
| if len(chunk) < 3: | |
| continue | |
| chunk_lower = chunk.lower() | |
| # ๊ธ์ ํค์๋ ์ฒดํฌ - chunk ๊ทธ๋๋ก ์ฌ์ฉ | |
| for keyword in positive_keywords: | |
| if keyword in chunk_lower: | |
| # chunk๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉ (์ด๋ฏธ ์กฐ๊ฐ์ผ๋ก ๋ถ๋ฆฌ๋์ด ์์ผ๋ฏ๋ก) | |
| evidence = chunk | |
| if len(evidence) > 20: | |
| evidence = evidence[:20] | |
| positive_evidence.append(f'"{evidence}"') | |
| break | |
| # ๋ถ์ ํค์๋ ์ฒดํฌ - chunk ๊ทธ๋๋ก ์ฌ์ฉ | |
| for keyword in negative_keywords: | |
| if keyword in chunk_lower: | |
| # chunk๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉ (์ด๋ฏธ ์กฐ๊ฐ์ผ๋ก ๋ถ๋ฆฌ๋์ด ์์ผ๋ฏ๋ก) | |
| evidence = chunk | |
| if len(evidence) > 20: | |
| evidence = evidence[:20] | |
| negative_evidence.append(f'"{evidence}"') | |
| break | |
| # ์ต๋ 2๊ฐ์ฉ๋ง ํ์ | |
| positive_text = ", ".join(positive_evidence[:2]) if positive_evidence else "-" | |
| negative_text = ", ".join(negative_evidence[:2]) if negative_evidence else "-" | |
| return { | |
| "positive": positive_text, | |
| "negative": negative_text | |
| } | |
| def generate_comprehensive_analysis(self, review_text: str, analysis_result: Dict) -> Dict: | |
| """ | |
| ์ข ํฉ ๋ถ์ ์์ฑ - ํญ๋ชฉ๋ณ ํ๊ฐ ๋ฐ ์์ฝ | |
| Args: | |
| review_text: ์๋ณธ ๋ฆฌ๋ทฐ ํ ์คํธ | |
| analysis_result: 3๋จ๊ณ ๋ถ์ ๊ฒฐ๊ณผ | |
| Returns: | |
| ์ข ํฉ ๋ถ์ ๊ฒฐ๊ณผ | |
| """ | |
| sentiment = analysis_result['sentiment']['sentiment'] | |
| sentiment_scores = analysis_result['sentiment']['scores'] | |
| tone = analysis_result['tone']['tone'] | |
| # ๋ชจ๋ ๊ฐ๋ฅํ ์นดํ ๊ณ ๋ฆฌ๋ฅผ ๊ฒ์ฌ (AI ๊ฒฐ๊ณผ์ ๋ฌด๊ดํ๊ฒ) | |
| all_possible_categories = ["๋ฐฐ์ก", "ํ์ง/๋์์ธ", "์ฌ์ด์ฆ", "๊ตํ/ํ๋ถ", "์๋น์ค", "๊ฐ๊ฒฉ", "๊ธฐ๋ฅ/์ฑ๋ฅ"] | |
| # ํญ๋ชฉ๋ณ ํ๊ฐ | |
| item_ratings = [] | |
| for category in all_possible_categories: | |
| # ํด๋น ์นดํ ๊ณ ๋ฆฌ์ ๊ฐ์ ๋ถ์ (๋จผ์ ๊ฐ์ ์ ํ์ ) | |
| category_sentiment = self.analyze_sentiment_for_category(review_text, category) | |
| # ๊ทผ๊ฑฐ ์ถ์ถ (๊ฐ์ ๊ณผ ์ผ์นํ๋ ๊ทผ๊ฑฐ ์ฐ์ ) | |
| evidence = self.extract_evidence_from_text(review_text, category, category_sentiment) | |
| # ๊ทผ๊ฑฐ๊ฐ ์์ผ๋ฉด ํด๋น ํญ๋ชฉ ์ ์ธ | |
| if evidence == "-": | |
| continue | |
| # ๋ณ์ ๊ณ์ฐ (์นดํ ๊ณ ๋ฆฌ๋ณ ๊ฐ์ ๊ธฐ๋ฐ) | |
| if category_sentiment == "๋ถ์ ": | |
| rating = 2 | |
| elif category_sentiment == "๊ธ์ ": | |
| rating = 5 | |
| else: | |
| rating = 3 | |
| item_ratings.append({ | |
| "category": category, | |
| "rating": rating, | |
| "evidence": evidence, | |
| "sentiment": category_sentiment | |
| }) | |
| # ์ ์ฒด ํค ๋น์จ | |
| positive_ratio = sentiment_scores.get('๊ธ์ ', 0) | |
| negative_ratio = sentiment_scores.get('๋ถ์ ', 0) | |
| neutral_ratio = sentiment_scores.get('์ค๋ฆฝ', 0) | |
| # ์ ์ฒด ํค ๊ทผ๊ฑฐ ์ถ์ถ | |
| tone_evidence = self.extract_tone_evidence(review_text) | |
| # ์์ฝ ๋ฌธ์ฅ ์์ฑ | |
| summary = self.generate_summary_sentence(review_text, item_ratings, sentiment) | |
| return { | |
| "item_ratings": item_ratings, | |
| "tone_ratio": { | |
| "positive": round(positive_ratio), | |
| "negative": round(negative_ratio), | |
| "neutral": round(neutral_ratio) | |
| }, | |
| "tone_evidence": tone_evidence, | |
| "summary": summary, | |
| "overall_sentiment": sentiment | |
| } | |
| def generate_summary_sentence(self, review_text: str, item_ratings: List[Dict], sentiment: str) -> str: | |
| """ | |
| ์์ฝ ๋ฌธ์ฅ ์๋ ์์ฑ | |
| Args: | |
| review_text: ์๋ณธ ๋ฆฌ๋ทฐ | |
| item_ratings: ํญ๋ชฉ๋ณ ํ๊ฐ | |
| sentiment: ์ ์ฒด ๊ฐ์ | |
| Returns: | |
| ์์ฝ ๋ฌธ์ฅ | |
| """ | |
| # ๋์ ํ๊ฐ ํญ๋ชฉ๊ณผ ๋ฎ์ ํ๊ฐ ํญ๋ชฉ ์ฐพ๊ธฐ | |
| high_rated = [item for item in item_ratings if item['rating'] >= 4] | |
| low_rated = [item for item in item_ratings if item['rating'] <= 2] | |
| if high_rated and low_rated: | |
| # ์ฅ๋จ์ ์ด ๋ชจ๋ ์๋ ๊ฒฝ์ฐ | |
| high_cats = ", ".join([item['category'] for item in high_rated[:2]]) | |
| low_cats = ", ".join([item['category'] for item in low_rated[:2]]) | |
| return f"{high_cats}์(๋) ์ข์ง๋ง, {low_cats} ๋ถ๋ถ์ด ์์ฌ์ด ์ ํ์ด์์." | |
| elif high_rated: | |
| # ๊ธ์ ์ ์ธ ๊ฒฝ์ฐ | |
| high_cats = ", ".join([item['category'] for item in high_rated[:3]]) | |
| return f"{high_cats} ๋ชจ๋ ๋ง์กฑ์ค๋ฌ์ด ์ ํ์ด์์." | |
| elif low_rated: | |
| # ๋ถ์ ์ ์ธ ๊ฒฝ์ฐ | |
| low_cats = ", ".join([item['category'] for item in low_rated[:3]]) | |
| return f"{low_cats} ๋ถ๋ถ์ด ๊ธฐ๋์ ๋ชป ๋ฏธ์น๋ ์ ํ์ด์์." | |
| else: | |
| # ์ค๋ฆฝ์ ์ธ ๊ฒฝ์ฐ | |
| if sentiment == "๊ธ์ ": | |
| return "์ ๋ฐ์ ์ผ๋ก ๋ง์กฑ์ค๋ฌ์ด ์ ํ์ด์์." | |
| elif sentiment == "๋ถ์ ": | |
| return "์ ๋ฐ์ ์ผ๋ก ์์ฌ์์ด ๋จ๋ ์ ํ์ด์์." | |
| else: | |
| return "๋ฌด๋ํ ์์ค์ ์ ํ์ด์์." | |
| def analyze_review(self, review_text: str, include_comprehensive: bool = True) -> Dict: | |
| """ | |
| ๋จ์ผ ๋ฆฌ๋ทฐ๋ฅผ 3๋จ๊ณ๋ก ๋ถ์ํฉ๋๋ค. | |
| Args: | |
| review_text: ๋ถ์ํ ๋ฆฌ๋ทฐ ํ ์คํธ | |
| include_comprehensive: ์ข ํฉ ๋ถ์ ํฌํจ ์ฌ๋ถ | |
| Returns: | |
| 3๋จ๊ณ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ํฌํจํ ๋์ ๋๋ฆฌ | |
| """ | |
| # ํ ์คํธ ์ ์ฒ๋ฆฌ | |
| processed_text = self.preprocess_text(review_text) | |
| # 1๋จ๊ณ: ๊ฐ์ ๋ถ์ | |
| sentiment_result = self.analyze_sentiment(processed_text) | |
| # 2๋จ๊ณ: ์นดํ ๊ณ ๋ฆฌ ๋ถ์ | |
| category_result = self.analyze_category(processed_text) | |
| # 3๋จ๊ณ: ํค ๋ถ์ | |
| tone_result = self.analyze_tone(processed_text) | |
| result = { | |
| "review": review_text, | |
| "sentiment": sentiment_result, | |
| "categories": category_result, | |
| "tone": tone_result, | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| # ์ข ํฉ ๋ถ์ ์ถ๊ฐ | |
| if include_comprehensive: | |
| result["comprehensive"] = self.generate_comprehensive_analysis(review_text, result) | |
| return result | |
| def analyze_reviews(self, reviews: List[str]) -> List[Dict]: | |
| """ | |
| ์ฌ๋ฌ ๋ฆฌ๋ทฐ๋ฅผ ์ผ๊ด ๋ถ์ํฉ๋๋ค. | |
| Args: | |
| reviews: ๋ถ์ํ ๋ฆฌ๋ทฐ ํ ์คํธ ๋ฆฌ์คํธ | |
| Returns: | |
| ๋ถ๋ฅ ๊ฒฐ๊ณผ ๋ฆฌ์คํธ | |
| """ | |
| results = [] | |
| for idx, review in enumerate(reviews, 1): | |
| print(f"\n[{idx}/{len(reviews)}] ๋ถ์ ์ค...") | |
| result = self.analyze_review(review) | |
| results.append(result) | |
| return results | |
| def print_results(self, results: List[Dict]): | |
| """๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ๋ณด๊ธฐ ์ข๊ฒ ์ถ๋ ฅํฉ๋๋ค.""" | |
| print("\n" + "="*80) | |
| print("๋ฆฌ๋ทฐ 3๋จ๊ณ ๋ถ์ ๊ฒฐ๊ณผ") | |
| print("="*80) | |
| for idx, result in enumerate(results, 1): | |
| print(f"\n[๋ฆฌ๋ทฐ #{idx}]") | |
| print(f"๋ด์ฉ: {result['review']}") | |
| print(f"\n1๏ธโฃ ๊ฐ์ : {result['sentiment']['sentiment']} ({result['sentiment']['confidence']}%)") | |
| # ์นดํ ๊ณ ๋ฆฌ ์ถ๋ ฅ | |
| categories_str = ', '.join([f"{c['category']} ({c['confidence']}%)" for c in result['categories']['main_categories']]) | |
| print(f"2๏ธโฃ ์นดํ ๊ณ ๋ฆฌ: {categories_str}") | |
| print(f"3๏ธโฃ ํค: {result['tone']['tone']} ({result['tone']['confidence']}%)") | |
| print("\n" + "="*80) | |
| def save_results(self, results: List[Dict], filename: str = "review_results.json"): | |
| """๋ถ์ ๊ฒฐ๊ณผ๋ฅผ JSON ํ์ผ๋ก ์ ์ฅํฉ๋๋ค.""" | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| json.dump(results, f, ensure_ascii=False, indent=2) | |
| print(f"\n๊ฒฐ๊ณผ๊ฐ {filename}์ ์ ์ฅ๋์์ต๋๋ค.") | |
| def load_reviews_from_csv(self, csv_file: str) -> List[str]: | |
| """ | |
| CSV ํ์ผ์์ ๋ฆฌ๋ทฐ๋ฅผ ๋ก๋ํฉ๋๋ค. | |
| Args: | |
| csv_file: CSV ํ์ผ ๊ฒฝ๋ก | |
| Returns: | |
| ๋ฆฌ๋ทฐ ํ ์คํธ ๋ฆฌ์คํธ | |
| """ | |
| reviews = [] | |
| with open(csv_file, 'r', encoding='utf-8') as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| reviews.append(row['review_text']) | |
| return reviews | |
| def analyze_for_gradio(self, review_text: str): | |
| """ | |
| Gradio UI์ฉ ๋ฆฌ๋ทฐ ๋ถ์ ํจ์ | |
| Args: | |
| review_text: ๋ถ์ํ ๋ฆฌ๋ทฐ ํ ์คํธ | |
| Returns: | |
| (๊ฐ์ ๊ฒฐ๊ณผ, ์นดํ ๊ณ ๋ฆฌ ๊ฒฐ๊ณผ, ํค ๊ฒฐ๊ณผ, ๊ฐ์ ๋ถํฌ, ์นดํ ๊ณ ๋ฆฌ ๋ถํฌ, ํค ๋ถํฌ) ํํ | |
| """ | |
| if not review_text or review_text.strip() == "": | |
| return "โ ๏ธ ๋ฆฌ๋ทฐ๋ฅผ ์ ๋ ฅํด์ฃผ์ธ์", "", "", {}, {}, {} | |
| result = self.analyze_review(review_text, include_comprehensive=False) | |
| # 1๋จ๊ณ: ๊ฐ์ ๋ถ์ ๊ฒฐ๊ณผ | |
| sentiment = result['sentiment']['sentiment'] | |
| sentiment_conf = result['sentiment']['confidence'] | |
| sentiment_output = f"{sentiment} ({sentiment_conf}%)" | |
| # 2๋จ๊ณ: ์นดํ ๊ณ ๋ฆฌ ๋ถ์ ๊ฒฐ๊ณผ | |
| categories = result['categories']['main_categories'] | |
| if categories: | |
| category_list = [f"โข {c['category']}" for c in categories] | |
| category_output = "\n".join(category_list) | |
| else: | |
| category_output = "ํด๋น ์นดํ ๊ณ ๋ฆฌ ์์" | |
| # 3๋จ๊ณ: ํค ๋ถ์ ๊ฒฐ๊ณผ | |
| tone = result['tone']['tone'] | |
| tone_conf = result['tone']['confidence'] | |
| tone_output = f"{tone} ({tone_conf}%)" | |
| # ํ๋ฅ ๋ถํฌ ๋์ ๋๋ฆฌ๋ค (Gradio Label ์ปดํฌ๋ํธ์ฉ) | |
| sentiment_probs = { | |
| k: v / 100.0 for k, v in result['sentiment']['scores'].items() | |
| } | |
| category_probs = { | |
| k: v / 100.0 for k, v in result['categories']['all_scores'].items() | |
| } | |
| tone_probs = { | |
| k: v / 100.0 for k, v in result['tone']['scores'].items() | |
| } | |
| return sentiment_output, category_output, tone_output, sentiment_probs, category_probs, tone_probs | |
| def format_comprehensive_analysis(self, comprehensive: Dict) -> str: | |
| """ | |
| ์ข ํฉ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ๋งํฌ๋ค์ด ํ์์ผ๋ก ํฌ๋งทํ | |
| Args: | |
| comprehensive: ์ข ํฉ ๋ถ์ ๋์ ๋๋ฆฌ | |
| Returns: | |
| ๋งํฌ๋ค์ด ํ์์ ๋ฌธ์์ด | |
| """ | |
| output = "| ํญ๋ชฉ | ๊ฐ์ | ๋ง์กฑ๋ | ๊ทผ๊ฑฐ |\n" | |
| output += "|------|------|--------|------|\n" | |
| # ํญ๋ชฉ๋ณ ํ๊ฐ | |
| for item in comprehensive['item_ratings']: | |
| stars = "โญ๏ธ" * item['rating'] | |
| sentiment = item.get('sentiment', '์ค๋ฆฝ') | |
| output += f"| {item['category']} | {sentiment} | {stars} | {item['evidence']} |\n" | |
| # ์ ์ฒด ํค | |
| tone_ratio = comprehensive['tone_ratio'] | |
| tone_evidence = comprehensive.get('tone_evidence', {"positive": "-", "negative": "-"}) | |
| tone_summary = "" | |
| if tone_ratio['positive'] > tone_ratio['negative'] + 20: | |
| tone_summary = "๊ธ์ ์ด ์ฐ์ธํจ" | |
| elif tone_ratio['negative'] > tone_ratio['positive'] + 20: | |
| tone_summary = "๋ถ์ ์ด ์ฐ์ธํจ" | |
| else: | |
| tone_summary = "๊ธ์ ๊ณผ ๋ถ์ ์ด ํผ์ฌ๋จ" | |
| # ์ ์ฒด ํค ๊ทผ๊ฑฐ ํฌ๋งทํ : "๊ธ์ : xxx / ๋ถ์ : xxx" | |
| tone_evidence_text = f"๊ธ์ : {tone_evidence['positive']} / ๋ถ์ : {tone_evidence['negative']}" | |
| output += f"| ์ ์ฒด ํค | {tone_summary} | ๊ธ์ {tone_ratio['positive']} : ๋ถ์ {tone_ratio['negative']} | {tone_evidence_text} |\n" | |
| return output | |
| # ์ ์ญ ๋ถ์๊ธฐ ์ธ์คํด์ค (Gradio ์ฑ ์์ ์ ํ ๋ฒ๋ง ๋ก๋) | |
| analyzer = None | |
| def get_analyzer(): | |
| """๋ถ์๊ธฐ ์ฑ๊ธํค ์ธ์คํด์ค ๋ฐํ""" | |
| global analyzer | |
| if analyzer is None: | |
| analyzer = ReviewAnalyzer() | |
| return analyzer | |
| def create_gradio_app(): | |
| """Gradio ์น ์ ํ๋ฆฌ์ผ์ด์ ์์ฑ""" | |
| # ๋ถ์๊ธฐ ์ด๊ธฐํ | |
| review_analyzer = get_analyzer() | |
| # ์ํ ๋ฆฌ๋ทฐ ์์ | |
| examples = [ | |
| ["์ ๋ง ์ข์ ์ ํ์ด์์! ๋ฐฐ์ก๋ ๋น ๋ฅด๊ณ ํ์ง๋ ํ๋ฅญํฉ๋๋ค. ๋ค์์๋ ๋ ๊ตฌ๋งคํ ๊ฒ์!"], | |
| ["์์ ์ค๋ง์ด์์. ์ฌ์ง์ด๋ ์์ ๋ค๋ฅด๊ณ ํ์ง๋ ๋ณ๋ก์ ๋๋ค. ํ๋ถ ์ ์ฒญํ์ต๋๋ค. ๋ค๋ง ํ๋ถ ์ฒ๋ฆฌ๋ ๋นจ๋ผ์ ์ข์์ด์."], | |
| ["ํ๋ ๋์ด์๊ณ ์ฌ์ด์ฆ๋ ๋ฑ๋ง๊ณ ๋ค์ข์๋ฐ ํธ๋น ์ง์ด ์ฅ๋์ด ์๋์์~~๊ฐ์ํ ๋งํ๋ฐ ์๊ทผ ์ง์ฆ๋ ์๋? ๊ทธ๋ฅ ์ ์ผ๋ฉด ๊ณ ์์ด๋ง๋ฅ ํธ์ ๋ฟ๋ด์ ใ ใ "], | |
| ["ํ ๋ ๊ทธ๋จ @abcd1234๋ก ์ฐ๋ฝ์ฃผ์๋ฉด ๋ฐ๊ฐ์ ๋๋ฆฝ๋๋ค. ๋๋งค๊ฐ๋ก ํ๋งค์ค!"], | |
| ["๋ฐฐ์ก์ด ์๊ฐ๋ณด๋ค ๋นจ๋ผ์ ์ข์์ด์. ํ์ง๋ ๊ด์ฐฎ๊ณ ๊ฐ๊ฒฉ๋๋น ๋ง์กฑํฉ๋๋ค."], | |
| ["์ฌ์ด์ฆ๊ฐ ๋๋ฌด ์์์. ๊ตํํ๋ ค๊ณ ํ๋๋ฐ ์ ์ฐจ๊ฐ ๋ณต์กํ๋ค์."], | |
| ["๋์์ธ์ ์์๋ฐ ํ์ง์ด ๊ฐ๊ฒฉ์ ๋นํด ๋ณ๋ก์ ๋๋ค. ๊ทธ๋ฅ์ ๋ฅ์ด์์."], | |
| ["์ธํธ ๊ฐ๊ฒฉ ๊ฐ์ฑ๋น ์ต๊ณ ์์ฉโค๏ธ๐คใ๋ฐ๋ปํ๊ณ ํญ๋ฅํญ๋ฅํ ๋๋ ๋๋ฌด ์กฐ์์ฌ!! ํ ๋๋ฌด ์๋ป์ฉ!!!"] | |
| ] | |
| # Gradio ์ธํฐํ์ด์ค ์์ฑ - ๋ชจ๋ ๋์๋ณด๋ ๋ ์ด์์ | |
| with gr.Blocks( | |
| title="๋ฆฌ๋ทฐ 3๋จ๊ณ ๋ถ์ ์๋น์ค", | |
| theme=gr.themes.Default( | |
| primary_hue="blue", | |
| secondary_hue="slate", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Noto Sans KR") | |
| ), | |
| css=""" | |
| .card-header { | |
| font-size: 1.2em; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| padding: 10px; | |
| border-radius: 8px; | |
| text-align: center; | |
| } | |
| .sentiment-positive { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; } | |
| .sentiment-neutral { background: #6b7280; color: white; } | |
| .sentiment-negative { background: linear-gradient(135deg, #fa709a 0%, #fee140 100%); color: white; } | |
| .metric-card { | |
| border: 2px solid #e5e7eb; | |
| border-radius: 12px; | |
| padding: 10px; | |
| background: white; | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
| } | |
| .big-emoji { font-size: 3em; text-align: center; margin: 10px 0; } | |
| .big-text { font-size: 1.8em; font-weight: bold; text-align: center; margin: 5px 0; } | |
| .confidence { font-size: 1.2em; color: #6b7280; text-align: center; } | |
| /* Label ์ปดํฌ๋ํธ ํจ๋ฉ ์กฐ์ - ๊ฐ ํ๋ฅ ํญ๋ชฉ๋ค์ ํจ๋ฉ ์ค์ด๊ธฐ */ | |
| .label .output-class { padding: 6px 12px !important; } | |
| .label-wrap .output-class { padding: 6px 12px !important; } | |
| .compact-label .output-class { padding: 6px 12px !important; } | |
| """ | |
| ) as demo: | |
| # ํค๋ | |
| gr.Markdown(""" | |
| # ๐ ๋ฆฌ๋ทฐ ๋ถ์ ๋์๋ณด๋ | |
| AI ๊ธฐ๋ฐ 3๋จ๊ณ ๋ถ์์ผ๋ก ๋ฆฌ๋ทฐ๋ฅผ ์๋์ผ๋ก ๊ฒ์ํ๊ณ ์ธ์ฌ์ดํธ๋ฅผ ์ถ์ถํฉ๋๋ค. | |
| """) | |
| # 2๋จ ๋ ์ด์์: ๋ฆฌ๋ทฐ์ ๋ ฅ / ๋ถ์๊ฒฐ๊ณผ | |
| with gr.Row(): | |
| # ์ผ์ชฝ: ์ ๋ ฅ ์น์ | |
| with gr.Column(scale=1): | |
| gr.Markdown("## ๋ฆฌ๋ทฐ ์ ๋ ฅ") | |
| review_input = gr.Textbox( | |
| label="TextBox", | |
| placeholder="๋ถ์ํ ๋ฆฌ๋ทฐ ๋ด์ฉ์ ์ ๋ ฅํด์ฃผ์ธ์...", | |
| lines=10, | |
| max_lines=20 | |
| ) | |
| submit_btn = gr.Button("๐ ๋ถ์ ์์", variant="primary", size="lg") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=review_input, | |
| label="๐ก ์์ ๋ฆฌ๋ทฐ" | |
| ) | |
| # ์ค๋ฅธ์ชฝ: 3๋จ๊ณ ๋ถ์ | |
| with gr.Column(scale=1): | |
| gr.Markdown("## ๋ถ์ ๊ฒฐ๊ณผ") | |
| # 1๋จ๊ณ: ๊ฐ์ ๋ถ์ | |
| gr.HTML('<div class="card-header sentiment-neutral">1. ๊ฐ์ ๋ถ์</div>') | |
| with gr.Group(elem_classes="metric-card"): | |
| sentiment_output = gr.Textbox( | |
| label="", | |
| lines=1, | |
| interactive=False, | |
| show_label=False, | |
| container=False, | |
| elem_classes="big-text", | |
| visible=False | |
| ) | |
| sentiment_prob = gr.Label( | |
| label="ํ๋ฅ ๋ถํฌ", | |
| num_top_classes=3, | |
| show_label=False, | |
| elem_classes="compact-label" | |
| ) | |
| # 2๋จ๊ณ: ์นดํ ๊ณ ๋ฆฌ ๋ถ์ | |
| gr.HTML('<div class="card-header sentiment-neutral">2. ์นดํ ๊ณ ๋ฆฌ ๋ถ์</div>') | |
| with gr.Group(elem_classes="metric-card"): | |
| category_output = gr.Textbox( | |
| label="", | |
| lines=4, | |
| interactive=False, | |
| show_label=False, | |
| container=False, | |
| visible=False | |
| ) | |
| category_prob = gr.Label( | |
| label="ํ๋ฅ ๋ถํฌ", | |
| num_top_classes=5, | |
| show_label=False, | |
| elem_classes="compact-label" | |
| ) | |
| # 3๋จ๊ณ: ํค ํ์ง | |
| gr.HTML('<div class="card-header sentiment-neutral">3. ๋ฆฌ๋ทฐ ํค ํ์ง</div>') | |
| with gr.Group(elem_classes="metric-card"): | |
| tone_output = gr.Textbox( | |
| label="", | |
| lines=1, | |
| interactive=False, | |
| show_label=False, | |
| container=False, | |
| elem_classes="big-text", | |
| visible=False | |
| ) | |
| tone_prob = gr.Label( | |
| label="ํ๋ฅ ๋ถํฌ", | |
| num_top_classes=3, | |
| show_label=False, | |
| elem_classes="compact-label" | |
| ) | |
| # ์ด๋ฒคํธ ํธ๋ค๋ฌ | |
| submit_btn.click( | |
| fn=review_analyzer.analyze_for_gradio, | |
| inputs=review_input, | |
| outputs=[sentiment_output, category_output, tone_output, | |
| sentiment_prob, category_prob, tone_prob] | |
| ) | |
| review_input.submit( | |
| fn=review_analyzer.analyze_for_gradio, | |
| inputs=review_input, | |
| outputs=[sentiment_output, category_output, tone_output, | |
| sentiment_prob, category_prob, tone_prob] | |
| ) | |
| return demo | |
| def main(): | |
| """๋ฉ์ธ ์คํ ํจ์""" | |
| print("๋ฆฌ๋ทฐ ์๋ ๊ฒ์ ์๋น์ค ์์") | |
| print("-" * 80) | |
| # Gradio ์ฑ ์คํ | |
| app = create_gradio_app() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| inbrowser=True | |
| ) | |
| if __name__ == "__main__": | |
| main() | |