Spaces:
Sleeping
Sleeping
YeongMin
commited on
Commit
ยท
1f53218
1
Parent(s):
8861c85
0.4v
Browse files- .claude/settings.local.json +2 -1
- app.py +168 -100
.claude/settings.local.json
CHANGED
|
@@ -4,7 +4,8 @@
|
|
| 4 |
"Bash(del:*)",
|
| 5 |
"Bash(python3:*)",
|
| 6 |
"Bash(lsof:*)",
|
| 7 |
-
"Bash(xargs kill -9)"
|
|
|
|
| 8 |
],
|
| 9 |
"deny": [],
|
| 10 |
"ask": []
|
|
|
|
| 4 |
"Bash(del:*)",
|
| 5 |
"Bash(python3:*)",
|
| 6 |
"Bash(lsof:*)",
|
| 7 |
+
"Bash(xargs kill -9)",
|
| 8 |
+
"Bash(python app.py:*)"
|
| 9 |
],
|
| 10 |
"deny": [],
|
| 11 |
"ask": []
|
app.py
CHANGED
|
@@ -359,6 +359,7 @@ class ReviewAnalyzer:
|
|
| 359 |
def extract_evidence_from_text(self, text: str, category: str) -> str:
|
| 360 |
"""
|
| 361 |
ํ
์คํธ์์ ํน์ ์นดํ
๊ณ ๋ฆฌ ๊ด๋ จ ๊ทผ๊ฑฐ ๋ฌธ์ฅ ์ถ์ถ
|
|
|
|
| 362 |
|
| 363 |
Args:
|
| 364 |
text: ๋ฆฌ๋ทฐ ํ
์คํธ
|
|
@@ -371,34 +372,41 @@ class ReviewAnalyzer:
|
|
| 371 |
|
| 372 |
# ์นดํ
๊ณ ๋ฆฌ๋ณ ํค์๋ ๋งคํ
|
| 373 |
keywords = {
|
| 374 |
-
"๋ฐฐ์ก": ["๋ฐฐ์ก", "ํ๋ฐฐ", "๋์ฐฉ", "ํฌ์ฅ"
|
| 375 |
-
"ํ์ง/๋์์ธ": ["ํ์ง", "์ฌ์ง", "ํผํผ", "๋ด๊ตฌ", "์์ฑ๋", "ํธ๋น ์ง", "๋น ์ง", "๋์์ธ", "์์", "
|
| 376 |
-
"์ฌ์ด์ฆ": ["์ฌ์ด์ฆ", "ํฌ๊ธฐ", "ํ", "์น์"
|
| 377 |
"๊ตํ/ํ๋ถ": ["๊ตํ", "ํ๋ถ", "๋ฐํ"],
|
| 378 |
"์๋น์ค": ["์๋น์ค", "๊ณ ๊ฐ์ผํฐ", "์๋", "์น์ "],
|
| 379 |
"๊ฐ๊ฒฉ": ["๊ฐ๊ฒฉ", "๊ฐ์ฑ๋น", "๋น์ธ", "์ ๋ ด", "ํ ์ธ", "๋"],
|
| 380 |
"๊ธฐ๋ฅ/์ฑ๋ฅ": ["๊ธฐ๋ฅ", "์ฑ๋ฅ", "์๋", "ํจ๊ณผ", "์ฌ์ฉ"]
|
| 381 |
}
|
| 382 |
|
| 383 |
-
|
| 384 |
-
|
|
|
|
|
|
|
| 385 |
|
| 386 |
-
#
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
return "-"
|
| 398 |
|
| 399 |
def analyze_sentiment_for_category(self, text: str, category: str) -> str:
|
| 400 |
"""
|
| 401 |
ํน์ ์นดํ
๊ณ ๋ฆฌ์ ๋ํ ๊ฐ์ ๋ถ์
|
|
|
|
| 402 |
|
| 403 |
Args:
|
| 404 |
text: ๋ฆฌ๋ทฐ ํ
์คํธ
|
|
@@ -409,11 +417,11 @@ class ReviewAnalyzer:
|
|
| 409 |
"""
|
| 410 |
import re
|
| 411 |
|
| 412 |
-
# ์นดํ
๊ณ ๋ฆฌ ๊ด๋ จ
|
| 413 |
keywords = {
|
| 414 |
-
"๋ฐฐ์ก": ["๋ฐฐ์ก", "ํ๋ฐฐ", "๋์ฐฉ", "ํฌ์ฅ"
|
| 415 |
-
"ํ์ง/๋์์ธ": ["ํ์ง", "์ฌ์ง", "ํผํผ", "๋ด๊ตฌ", "์์ฑ๋", "ํธ๋น ์ง", "๋น ์ง", "๋์์ธ", "์์", "
|
| 416 |
-
"์ฌ์ด์ฆ": ["์ฌ์ด์ฆ", "ํฌ๊ธฐ", "ํ", "์น์"
|
| 417 |
"๊ตํ/ํ๋ถ": ["๊ตํ", "ํ๋ถ", "๋ฐํ"],
|
| 418 |
"์๋น์ค": ["์๋น์ค", "๊ณ ๊ฐ์ผํฐ", "์๋", "์น์ "],
|
| 419 |
"๊ฐ๊ฒฉ": ["๊ฐ๊ฒฉ", "๊ฐ์ฑ๋น", "๋น์ธ", "์ ๋ ด", "ํ ์ธ", "๋"],
|
|
@@ -421,37 +429,108 @@ class ReviewAnalyzer:
|
|
| 421 |
}
|
| 422 |
|
| 423 |
# ๊ธ์ ํค์๋ (๋ช
์์ ๊ธ์ ํํ)
|
| 424 |
-
positive_keywords = ["์ข", "ํ๋ฅญ", "๋ง์กฑ", "์ต๊ณ ", "์์", "์ด์", "๋ฑ๋ง", "๋น ๋ฅด", "๊ด์ฐฎ"]
|
| 425 |
|
| 426 |
# ๋ถ์ ํค์๋
|
| 427 |
-
negative_keywords = ["๋ณ๋ก", "์์ฝ", "์ค๋ง", "์ต์
", "์ง์ฆ", "๋ฌธ์ "]
|
| 428 |
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
# ๋ถ์ ํค์๋ ์ฒดํฌ
|
| 448 |
-
for neg_keyword in negative_keywords:
|
| 449 |
-
if neg_keyword in sentence:
|
| 450 |
-
return "๋ถ์ "
|
| 451 |
|
| 452 |
# ๊ธฐ๋ณธ๊ฐ์ ์ค๋ฆฝ
|
| 453 |
return "์ค๋ฆฝ"
|
| 454 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
def generate_comprehensive_analysis(self, review_text: str, analysis_result: Dict) -> Dict:
|
| 456 |
"""
|
| 457 |
์ข
ํฉ ๋ถ์ ์์ฑ - ํญ๋ชฉ๋ณ ํ๊ฐ ๋ฐ ์์ฝ
|
|
@@ -465,14 +544,20 @@ class ReviewAnalyzer:
|
|
| 465 |
"""
|
| 466 |
sentiment = analysis_result['sentiment']['sentiment']
|
| 467 |
sentiment_scores = analysis_result['sentiment']['scores']
|
| 468 |
-
categories = analysis_result['categories']['main_categories']
|
| 469 |
tone = analysis_result['tone']['tone']
|
| 470 |
|
|
|
|
|
|
|
|
|
|
| 471 |
# ํญ๋ชฉ๋ณ ๏ฟฝ๏ฟฝ๊ฐ
|
| 472 |
item_ratings = []
|
| 473 |
-
for
|
| 474 |
-
|
| 475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
|
| 477 |
# ํด๋น ์นดํ
๊ณ ๋ฆฌ์ ๊ฐ์ ๋ถ์
|
| 478 |
category_sentiment = self.analyze_sentiment_for_category(review_text, category)
|
|
@@ -481,66 +566,36 @@ class ReviewAnalyzer:
|
|
| 481 |
if category_sentiment == "๋ถ์ ":
|
| 482 |
rating = 2
|
| 483 |
elif category_sentiment == "๊ธ์ ":
|
| 484 |
-
rating =
|
| 485 |
else:
|
| 486 |
rating = 3
|
| 487 |
|
| 488 |
-
# ๊ทผ๊ฑฐ ์ถ์ถ
|
| 489 |
-
evidence = self.extract_evidence_from_text(review_text, category)
|
| 490 |
-
|
| 491 |
item_ratings.append({
|
| 492 |
"category": category,
|
| 493 |
"rating": rating,
|
| 494 |
"evidence": evidence,
|
| 495 |
-
"
|
| 496 |
})
|
| 497 |
|
| 498 |
-
# ์ฌ๊ตฌ๋งค ์ํฅ ์ถ์
|
| 499 |
-
repurchase_score = 3 # ๊ธฐ๋ณธ๊ฐ
|
| 500 |
-
if sentiment == "๊ธ์ ":
|
| 501 |
-
repurchase_score = 4
|
| 502 |
-
if sentiment_scores['๊ธ์ '] > 70:
|
| 503 |
-
repurchase_score = 5
|
| 504 |
-
elif sentiment == "๋ถ์ ":
|
| 505 |
-
repurchase_score = 2
|
| 506 |
-
if sentiment_scores['๋ถ์ '] > 70:
|
| 507 |
-
repurchase_score = 1
|
| 508 |
-
else:
|
| 509 |
-
repurchase_score = 3
|
| 510 |
-
|
| 511 |
-
# ์ฌ๊ตฌ๋งค ์ํฅ ๊ทผ๊ฑฐ
|
| 512 |
-
repurchase_keywords = ["๋", "๋ค์", "์ฌ๊ตฌ๋งค", "์ถ์ฒ", "ํ๋ถ", "์ต์
"]
|
| 513 |
-
repurchase_evidence = "-"
|
| 514 |
-
for keyword in repurchase_keywords:
|
| 515 |
-
if keyword in review_text:
|
| 516 |
-
import re
|
| 517 |
-
sentences = re.split(r'[.!?~]+\s*', review_text)
|
| 518 |
-
for sentence in sentences:
|
| 519 |
-
if keyword in sentence and len(sentence.strip()) > 5:
|
| 520 |
-
repurchase_evidence = f'"{sentence.strip()[:40]}"'
|
| 521 |
-
break
|
| 522 |
-
if repurchase_evidence != "-":
|
| 523 |
-
break
|
| 524 |
-
|
| 525 |
# ์ ์ฒด ํค ๋น์จ
|
| 526 |
positive_ratio = sentiment_scores.get('๊ธ์ ', 0)
|
| 527 |
negative_ratio = sentiment_scores.get('๋ถ์ ', 0)
|
| 528 |
neutral_ratio = sentiment_scores.get('์ค๋ฆฝ', 0)
|
| 529 |
|
|
|
|
|
|
|
|
|
|
| 530 |
# ์์ฝ ๋ฌธ์ฅ ์์ฑ
|
| 531 |
summary = self.generate_summary_sentence(review_text, item_ratings, sentiment)
|
| 532 |
|
| 533 |
return {
|
| 534 |
"item_ratings": item_ratings,
|
| 535 |
-
"repurchase": {
|
| 536 |
-
"rating": repurchase_score,
|
| 537 |
-
"evidence": repurchase_evidence
|
| 538 |
-
},
|
| 539 |
"tone_ratio": {
|
| 540 |
"positive": round(positive_ratio),
|
| 541 |
"negative": round(negative_ratio),
|
| 542 |
"neutral": round(neutral_ratio)
|
| 543 |
},
|
|
|
|
| 544 |
"summary": summary,
|
| 545 |
"overall_sentiment": sentiment
|
| 546 |
}
|
|
@@ -758,28 +813,31 @@ class ReviewAnalyzer:
|
|
| 758 |
๋งํฌ๋ค์ด ํ์์ ๋ฌธ์์ด
|
| 759 |
"""
|
| 760 |
output = "## โ๏ธ ์ข
ํฉ ๋ถ์\n\n"
|
| 761 |
-
output += "| ํญ๋ชฉ |
|
| 762 |
-
output += "
|
| 763 |
|
| 764 |
# ํญ๋ชฉ๋ณ ํ๊ฐ
|
| 765 |
for item in comprehensive['item_ratings']:
|
| 766 |
stars = "โญ๏ธ" * item['rating']
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
# ์ฌ๊ตฌ๋งค ์ํฅ
|
| 770 |
-
repurchase_stars = "โญ๏ธ" * comprehensive['repurchase']['rating']
|
| 771 |
-
output += f"| ์ฌ๊ตฌ๋งค ์ํฅ | {repurchase_stars} | {comprehensive['repurchase']['evidence']} |\n"
|
| 772 |
|
| 773 |
# ์ ์ฒด ํค
|
| 774 |
tone_ratio = comprehensive['tone_ratio']
|
| 775 |
-
|
| 776 |
|
|
|
|
| 777 |
if tone_ratio['positive'] > tone_ratio['negative'] + 20:
|
| 778 |
-
|
| 779 |
elif tone_ratio['negative'] > tone_ratio['positive'] + 20:
|
| 780 |
-
|
| 781 |
else:
|
| 782 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 783 |
|
| 784 |
# ์์ฝ ๋ฌธ์ฅ
|
| 785 |
output += f"\n## ๐ก ์์ฝ ๋ฌธ์ฅ\n\n"
|
|
@@ -815,6 +873,7 @@ def create_gradio_app():
|
|
| 815 |
["๋ฐฐ์ก์ด ์๊ฐ๋ณด๋ค ๋นจ๋ผ์ ์ข์์ด์. ํ์ง๋ ๊ด์ฐฎ๊ณ ๊ฐ๊ฒฉ๋๋น ๋ง์กฑํฉ๋๋ค."],
|
| 816 |
["์ฌ์ด์ฆ๊ฐ ๋๋ฌด ์์์. ๊ตํํ๋ ค๊ณ ํ๋๋ฐ ์ ์ฐจ๊ฐ ๋ณต์กํ๋ค์."],
|
| 817 |
["๋์์ธ์ ์์๋ฐ ํ์ง์ด ๊ฐ๊ฒฉ์ ๋นํด ๋ณ๋ก์
๋๋ค. ๊ทธ๋ฅ์ ๋ฅ์ด์์."],
|
|
|
|
| 818 |
]
|
| 819 |
|
| 820 |
# Gradio ์ธํฐํ์ด์ค ์์ฑ - ๋ชจ๋ ๋์๋ณด๋ ๋ ์ด์์
|
|
@@ -935,14 +994,23 @@ def create_gradio_app():
|
|
| 935 |
show_label=True
|
| 936 |
)
|
| 937 |
|
| 938 |
-
|
|
|
|
|
|
|
|
|
|
| 939 |
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
|
| 945 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 946 |
|
| 947 |
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
| 948 |
submit_btn.click(
|
|
|
|
| 359 |
def extract_evidence_from_text(self, text: str, category: str) -> str:
|
| 360 |
"""
|
| 361 |
ํ
์คํธ์์ ํน์ ์นดํ
๊ณ ๋ฆฌ ๊ด๋ จ ๊ทผ๊ฑฐ ๋ฌธ์ฅ ์ถ์ถ
|
| 362 |
+
์นดํ
๊ณ ๋ฆฌ ํค์๋๊ฐ ํฌํจ๋ ์กฐ๊ฐ๋ง ์ถ์ถํฉ๋๋ค.
|
| 363 |
|
| 364 |
Args:
|
| 365 |
text: ๋ฆฌ๋ทฐ ํ
์คํธ
|
|
|
|
| 372 |
|
| 373 |
# ์นดํ
๊ณ ๋ฆฌ๋ณ ํค์๋ ๋งคํ
|
| 374 |
keywords = {
|
| 375 |
+
"๋ฐฐ์ก": ["๋ฐฐ์ก", "ํ๋ฐฐ", "๋์ฐฉ", "ํฌ์ฅ"],
|
| 376 |
+
"ํ์ง/๋์์ธ": ["ํ์ง", "์ฌ์ง", "ํผํผ", "๋ด๊ตฌ", "์์ฑ๋", "ํธ๋น ์ง", "๋น ์ง", "๋์์ธ", "์์", "์คํ์ผ", "์ธ๊ด"],
|
| 377 |
+
"์ฌ์ด์ฆ": ["์ฌ์ด์ฆ", "ํฌ๊ธฐ", "ํ", "์น์"],
|
| 378 |
"๊ตํ/ํ๋ถ": ["๊ตํ", "ํ๋ถ", "๋ฐํ"],
|
| 379 |
"์๋น์ค": ["์๋น์ค", "๊ณ ๊ฐ์ผํฐ", "์๋", "์น์ "],
|
| 380 |
"๊ฐ๊ฒฉ": ["๊ฐ๊ฒฉ", "๊ฐ์ฑ๋น", "๋น์ธ", "์ ๋ ด", "ํ ์ธ", "๋"],
|
| 381 |
"๊ธฐ๋ฅ/์ฑ๋ฅ": ["๊ธฐ๋ฅ", "์ฑ๋ฅ", "์๋", "ํจ๊ณผ", "์ฌ์ฉ"]
|
| 382 |
}
|
| 383 |
|
| 384 |
+
if category not in keywords:
|
| 385 |
+
return "-"
|
| 386 |
+
|
| 387 |
+
category_keywords = keywords[category]
|
| 388 |
|
| 389 |
+
# ์ ์ฒด ํ
์คํธ๋ฅผ ์กฐ๊ฐ์ผ๋ก ๋๋๊ธฐ (์ผํ, ๊ทธ๋ฆฌ๊ณ , ํ์ง๋ง ๋ฑ์ผ๋ก ๋ถ๋ฆฌ)
|
| 390 |
+
# ์: "๋ฐฐ์ก๋ ๋น ๋ฅด๊ณ ํ์ง๋ ํ๋ฅญํฉ๋๋ค" -> ["๋ฐฐ์ก๋ ๋น ๋ฅด๊ณ ", "ํ์ง๋ ํ๋ฅญํฉ๋๋ค"]
|
| 391 |
+
chunks = re.split(r'[,]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ์ง๋ง\s+|\s+์ธ๋ฐ\s+', text)
|
| 392 |
+
|
| 393 |
+
for chunk in chunks:
|
| 394 |
+
chunk = chunk.strip()
|
| 395 |
+
# ์ด ์กฐ๊ฐ์ ์นดํ
๊ณ ๋ฆฌ ํค์๋๊ฐ ์๋์ง ํ์ธ
|
| 396 |
+
for keyword in category_keywords:
|
| 397 |
+
if keyword in chunk and len(chunk) > 5:
|
| 398 |
+
# chunk๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉ (์ด๋ฏธ ์กฐ๊ฐ์ผ๋ก ๋ถ๋ฆฌ๋์ด ์์ผ๋ฏ๋ก)
|
| 399 |
+
# ๋จ, ๋๋ฌด ๊ธด ๊ฒฝ์ฐ๋ง ์๋ผ๋ด๊ธฐ
|
| 400 |
+
if len(chunk) > 20:
|
| 401 |
+
chunk = chunk[:20]
|
| 402 |
+
return f'"{chunk}"'
|
| 403 |
|
| 404 |
return "-"
|
| 405 |
|
| 406 |
def analyze_sentiment_for_category(self, text: str, category: str) -> str:
|
| 407 |
"""
|
| 408 |
ํน์ ์นดํ
๊ณ ๋ฆฌ์ ๋ํ ๊ฐ์ ๋ถ์
|
| 409 |
+
์นดํ
๊ณ ๋ฆฌ ํค์๋ ๊ทผ์ฒ์ ๊ฐ์ ํํ๋ง ๋ถ์ํฉ๋๋ค.
|
| 410 |
|
| 411 |
Args:
|
| 412 |
text: ๋ฆฌ๋ทฐ ํ
์คํธ
|
|
|
|
| 417 |
"""
|
| 418 |
import re
|
| 419 |
|
| 420 |
+
# ์นดํ
๊ณ ๋ฆฌ ๊ด๋ จ ํค์๋
|
| 421 |
keywords = {
|
| 422 |
+
"๋ฐฐ์ก": ["๋ฐฐ์ก", "ํ๋ฐฐ", "๋์ฐฉ", "ํฌ์ฅ"],
|
| 423 |
+
"ํ์ง/๋์์ธ": ["ํ์ง", "์ฌ์ง", "ํผํผ", "๋ด๊ตฌ", "์์ฑ๋", "ํธ๋น ์ง", "๋น ์ง", "๋์์ธ", "์์", "์คํ์ผ", "์ธ๊ด"],
|
| 424 |
+
"์ฌ์ด์ฆ": ["์ฌ์ด์ฆ", "ํฌ๊ธฐ", "ํ", "์น์"],
|
| 425 |
"๊ตํ/ํ๋ถ": ["๊ตํ", "ํ๋ถ", "๋ฐํ"],
|
| 426 |
"์๋น์ค": ["์๋น์ค", "๊ณ ๊ฐ์ผํฐ", "์๋", "์น์ "],
|
| 427 |
"๊ฐ๊ฒฉ": ["๊ฐ๊ฒฉ", "๊ฐ์ฑ๋น", "๋น์ธ", "์ ๋ ด", "ํ ์ธ", "๋"],
|
|
|
|
| 429 |
}
|
| 430 |
|
| 431 |
# ๊ธ์ ํค์๋ (๋ช
์์ ๊ธ์ ํํ)
|
| 432 |
+
positive_keywords = ["์ข", "ํ๋ฅญ", "๋ง์กฑ", "์ต๊ณ ", "์์", "์ด์", "๋ฑ๋ง", "๋น ๋ฅด", "๊ด์ฐฎ", "์๋ฒฝ", "๋ฉ์ง", "๊ฐ์ฌ"]
|
| 433 |
|
| 434 |
# ๋ถ์ ํค์๋
|
| 435 |
+
negative_keywords = ["๋ณ๋ก", "์์ฝ", "์ค๋ง", "์ต์
", "์ง์ฆ", "๋ฌธ์ ", "๋์", "ํํธ์", "์๋ง", "ํํ"]
|
| 436 |
|
| 437 |
+
if category not in keywords:
|
| 438 |
+
return "์ค๋ฆฝ"
|
| 439 |
+
|
| 440 |
+
# ์นดํ
๊ณ ๋ฆฌ ํค์๋๊ฐ ํฌํจ๋ ๊ตฌ๊ฐ ์ฐพ๊ธฐ
|
| 441 |
+
category_keywords = keywords[category]
|
| 442 |
+
|
| 443 |
+
# ์ ์ฒด ํ
์คํธ๋ฅผ ์กฐ๊ฐ์ผ๋ก ๋๋๊ธฐ (์ผํ, ๊ทธ๋ฆฌ๊ณ , ํ์ง๋ง ๋ฑ์ผ๋ก ๋ถ๋ฆฌ)
|
| 444 |
+
# ์: "๋ฐฐ์ก์ ๋น ๋ฅธ๋ฐ ํ์ง์ด ๋ณ๋ก์์" -> ["๋ฐฐ์ก์ ๋น ๋ฅธ๋ฐ", "ํ์ง์ด ๋ณ๋ก์์"]
|
| 445 |
+
chunks = re.split(r'[,]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ์ง๋ง\s+|\s+์ธ๋ฐ\s+', text)
|
| 446 |
+
|
| 447 |
+
for chunk in chunks:
|
| 448 |
+
# ์ด ์กฐ๊ฐ์ ์นดํ
๊ณ ๋ฆฌ ํค์๋๊ฐ ์๋์ง ํ์ธ
|
| 449 |
+
has_category = False
|
| 450 |
+
for keyword in category_keywords:
|
| 451 |
+
if keyword in chunk:
|
| 452 |
+
has_category = True
|
| 453 |
+
break
|
| 454 |
|
| 455 |
+
if not has_category:
|
| 456 |
+
continue
|
| 457 |
+
|
| 458 |
+
# ์ด ์กฐ๊ฐ ๋ด์์๋ง ๊ฐ์ ํ๋จ
|
| 459 |
+
chunk_lower = chunk.lower()
|
| 460 |
+
|
| 461 |
+
# ๊ธ์ ํค์๋ ์ฒดํฌ
|
| 462 |
+
for pos_keyword in positive_keywords:
|
| 463 |
+
if pos_keyword in chunk_lower:
|
| 464 |
+
return "๊ธ์ "
|
| 465 |
+
|
| 466 |
+
# ๋ถ์ ํค์๋ ์ฒดํฌ
|
| 467 |
+
for neg_keyword in negative_keywords:
|
| 468 |
+
if neg_keyword in chunk_lower:
|
| 469 |
+
return "๋ถ์ "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
# ๊ธฐ๋ณธ๊ฐ์ ์ค๋ฆฝ
|
| 472 |
return "์ค๋ฆฝ"
|
| 473 |
|
| 474 |
+
def extract_tone_evidence(self, text: str) -> Dict[str, str]:
|
| 475 |
+
"""
|
| 476 |
+
์ ์ฒด ํค์ ๊ธ์ /๋ถ์ ๊ทผ๊ฑฐ ์ถ์ถ
|
| 477 |
+
|
| 478 |
+
Args:
|
| 479 |
+
text: ๋ฆฌ๋ทฐ ํ
์คํธ
|
| 480 |
+
|
| 481 |
+
Returns:
|
| 482 |
+
{"positive": "๊ธ์ ๊ทผ๊ฑฐ", "negative": "๋ถ์ ๊ทผ๊ฑฐ"}
|
| 483 |
+
"""
|
| 484 |
+
import re
|
| 485 |
+
|
| 486 |
+
# ๊ธ์ ํค์๋
|
| 487 |
+
positive_keywords = ["์ข", "ํ๋ฅญ", "๋ง์กฑ", "์ต๊ณ ", "์์", "์ด์", "๋ฑ๋ง", "๋น ๋ฅด", "๊ด์ฐฎ", "์๋ฒฝ", "๋ฉ์ง", "๊ฐ์ฌ"]
|
| 488 |
+
|
| 489 |
+
# ๋ถ์ ํค์๋
|
| 490 |
+
negative_keywords = ["๋ณ๋ก", "์์ฝ", "์ค๋ง", "์ต์
", "์ง์ฆ", "๋ฌธ์ ", "๋์", "ํํธ์", "์๋ง", "ํํ", "๋ค๋ฅด"]
|
| 491 |
+
|
| 492 |
+
# ํ
์คํธ๋ฅผ ์กฐ๊ฐ์ผ๋ก ๋๋๊ธฐ
|
| 493 |
+
chunks = re.split(r'[,.]|\s+๊ทธ๋ฆฌ๊ณ \s+|\s+๊ทผ๋ฐ\s+|\s+ํ์ง๋ง\s+|\s+์ธ๋ฐ\s+', text)
|
| 494 |
+
|
| 495 |
+
positive_evidence = []
|
| 496 |
+
negative_evidence = []
|
| 497 |
+
|
| 498 |
+
for chunk in chunks:
|
| 499 |
+
chunk = chunk.strip()
|
| 500 |
+
if len(chunk) < 3:
|
| 501 |
+
continue
|
| 502 |
+
|
| 503 |
+
chunk_lower = chunk.lower()
|
| 504 |
+
|
| 505 |
+
# ๊ธ์ ํค์๋ ์ฒดํฌ - chunk ๊ทธ๋๋ก ์ฌ์ฉ
|
| 506 |
+
for keyword in positive_keywords:
|
| 507 |
+
if keyword in chunk_lower:
|
| 508 |
+
# chunk๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉ (์ด๋ฏธ ์กฐ๊ฐ์ผ๋ก ๋ถ๋ฆฌ๋์ด ์์ผ๋ฏ๋ก)
|
| 509 |
+
evidence = chunk
|
| 510 |
+
if len(evidence) > 20:
|
| 511 |
+
evidence = evidence[:20]
|
| 512 |
+
positive_evidence.append(f'"{evidence}"')
|
| 513 |
+
break
|
| 514 |
+
|
| 515 |
+
# ๋ถ์ ํค์๋ ์ฒดํฌ - chunk ๊ทธ๋๋ก ์ฌ์ฉ
|
| 516 |
+
for keyword in negative_keywords:
|
| 517 |
+
if keyword in chunk_lower:
|
| 518 |
+
# chunk๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉ (์ด๋ฏธ ์กฐ๊ฐ์ผ๋ก ๋ถ๋ฆฌ๋์ด ์์ผ๋ฏ๋ก)
|
| 519 |
+
evidence = chunk
|
| 520 |
+
if len(evidence) > 20:
|
| 521 |
+
evidence = evidence[:20]
|
| 522 |
+
negative_evidence.append(f'"{evidence}"')
|
| 523 |
+
break
|
| 524 |
+
|
| 525 |
+
# ์ต๋ 2๊ฐ์ฉ๋ง ํ์
|
| 526 |
+
positive_text = ", ".join(positive_evidence[:2]) if positive_evidence else "-"
|
| 527 |
+
negative_text = ", ".join(negative_evidence[:2]) if negative_evidence else "-"
|
| 528 |
+
|
| 529 |
+
return {
|
| 530 |
+
"positive": positive_text,
|
| 531 |
+
"negative": negative_text
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
def generate_comprehensive_analysis(self, review_text: str, analysis_result: Dict) -> Dict:
|
| 535 |
"""
|
| 536 |
์ข
ํฉ ๋ถ์ ์์ฑ - ํญ๋ชฉ๋ณ ํ๊ฐ ๋ฐ ์์ฝ
|
|
|
|
| 544 |
"""
|
| 545 |
sentiment = analysis_result['sentiment']['sentiment']
|
| 546 |
sentiment_scores = analysis_result['sentiment']['scores']
|
|
|
|
| 547 |
tone = analysis_result['tone']['tone']
|
| 548 |
|
| 549 |
+
# ๋ชจ๋ ๊ฐ๋ฅํ ์นดํ
๊ณ ๋ฆฌ๋ฅผ ๊ฒ์ฌ (AI ๊ฒฐ๊ณผ์ ๋ฌด๊ดํ๊ฒ)
|
| 550 |
+
all_possible_categories = ["๋ฐฐ์ก", "ํ์ง/๋์์ธ", "์ฌ์ด์ฆ", "๊ตํ/ํ๋ถ", "์๋น์ค", "๊ฐ๊ฒฉ", "๊ธฐ๋ฅ/์ฑ๋ฅ"]
|
| 551 |
+
|
| 552 |
# ํญ๋ชฉ๋ณ ๏ฟฝ๏ฟฝ๊ฐ
|
| 553 |
item_ratings = []
|
| 554 |
+
for category in all_possible_categories:
|
| 555 |
+
# ๊ทผ๊ฑฐ ์ถ์ถ
|
| 556 |
+
evidence = self.extract_evidence_from_text(review_text, category)
|
| 557 |
+
|
| 558 |
+
# ๊ทผ๊ฑฐ๊ฐ ์์ผ๋ฉด ํด๋น ํญ๋ชฉ ์ ์ธ
|
| 559 |
+
if evidence == "-":
|
| 560 |
+
continue
|
| 561 |
|
| 562 |
# ํด๋น ์นดํ
๊ณ ๋ฆฌ์ ๊ฐ์ ๋ถ์
|
| 563 |
category_sentiment = self.analyze_sentiment_for_category(review_text, category)
|
|
|
|
| 566 |
if category_sentiment == "๋ถ์ ":
|
| 567 |
rating = 2
|
| 568 |
elif category_sentiment == "๊ธ์ ":
|
| 569 |
+
rating = 5
|
| 570 |
else:
|
| 571 |
rating = 3
|
| 572 |
|
|
|
|
|
|
|
|
|
|
| 573 |
item_ratings.append({
|
| 574 |
"category": category,
|
| 575 |
"rating": rating,
|
| 576 |
"evidence": evidence,
|
| 577 |
+
"sentiment": category_sentiment
|
| 578 |
})
|
| 579 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
# ์ ์ฒด ํค ๋น์จ
|
| 581 |
positive_ratio = sentiment_scores.get('๊ธ์ ', 0)
|
| 582 |
negative_ratio = sentiment_scores.get('๋ถ์ ', 0)
|
| 583 |
neutral_ratio = sentiment_scores.get('์ค๋ฆฝ', 0)
|
| 584 |
|
| 585 |
+
# ์ ์ฒด ํค ๊ทผ๊ฑฐ ์ถ์ถ
|
| 586 |
+
tone_evidence = self.extract_tone_evidence(review_text)
|
| 587 |
+
|
| 588 |
# ์์ฝ ๋ฌธ์ฅ ์์ฑ
|
| 589 |
summary = self.generate_summary_sentence(review_text, item_ratings, sentiment)
|
| 590 |
|
| 591 |
return {
|
| 592 |
"item_ratings": item_ratings,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
"tone_ratio": {
|
| 594 |
"positive": round(positive_ratio),
|
| 595 |
"negative": round(negative_ratio),
|
| 596 |
"neutral": round(neutral_ratio)
|
| 597 |
},
|
| 598 |
+
"tone_evidence": tone_evidence,
|
| 599 |
"summary": summary,
|
| 600 |
"overall_sentiment": sentiment
|
| 601 |
}
|
|
|
|
| 813 |
๋งํฌ๋ค์ด ํ์์ ๋ฌธ์์ด
|
| 814 |
"""
|
| 815 |
output = "## โ๏ธ ์ข
ํฉ ๋ถ์\n\n"
|
| 816 |
+
output += "| ํญ๋ชฉ | ๊ฐ์ | ๋ง์กฑ๋ | ๊ทผ๊ฑฐ |\n"
|
| 817 |
+
output += "|------|------|--------|------|\n"
|
| 818 |
|
| 819 |
# ํญ๋ชฉ๋ณ ํ๊ฐ
|
| 820 |
for item in comprehensive['item_ratings']:
|
| 821 |
stars = "โญ๏ธ" * item['rating']
|
| 822 |
+
sentiment = item.get('sentiment', '์ค๋ฆฝ')
|
| 823 |
+
output += f"| {item['category']} | {sentiment} | {stars} | {item['evidence']} |\n"
|
|
|
|
|
|
|
|
|
|
| 824 |
|
| 825 |
# ์ ์ฒด ํค
|
| 826 |
tone_ratio = comprehensive['tone_ratio']
|
| 827 |
+
tone_evidence = comprehensive.get('tone_evidence', {"positive": "-", "negative": "-"})
|
| 828 |
|
| 829 |
+
tone_summary = ""
|
| 830 |
if tone_ratio['positive'] > tone_ratio['negative'] + 20:
|
| 831 |
+
tone_summary = "๊ธ์ ์ด ์ฐ์ธํจ"
|
| 832 |
elif tone_ratio['negative'] > tone_ratio['positive'] + 20:
|
| 833 |
+
tone_summary = "๋ถ์ ์ด ์ฐ์ธํจ"
|
| 834 |
else:
|
| 835 |
+
tone_summary = "๊ธ์ ๊ณผ ๋ถ์ ์ด ํผ์ฌ๋จ"
|
| 836 |
+
|
| 837 |
+
# ์ ์ฒด ํค ๊ทผ๊ฑฐ ํฌ๋งทํ
: "๊ธ์ : xxx / ๋ถ์ : xxx"
|
| 838 |
+
tone_evidence_text = f"๊ธ์ : {tone_evidence['positive']} / ๋ถ์ : {tone_evidence['negative']}"
|
| 839 |
+
|
| 840 |
+
output += f"| ์ ์ฒด ํค | {tone_summary} | ๊ธ์ {tone_ratio['positive']} : ๋ถ์ {tone_ratio['negative']} | {tone_evidence_text} |\n"
|
| 841 |
|
| 842 |
# ์์ฝ ๋ฌธ์ฅ
|
| 843 |
output += f"\n## ๐ก ์์ฝ ๋ฌธ์ฅ\n\n"
|
|
|
|
| 873 |
["๋ฐฐ์ก์ด ์๊ฐ๋ณด๋ค ๋นจ๋ผ์ ์ข์์ด์. ํ์ง๋ ๊ด์ฐฎ๊ณ ๊ฐ๊ฒฉ๋๋น ๋ง์กฑํฉ๋๋ค."],
|
| 874 |
["์ฌ์ด์ฆ๊ฐ ๋๋ฌด ์์์. ๊ตํํ๋ ค๊ณ ํ๋๋ฐ ์ ์ฐจ๊ฐ ๋ณต์กํ๋ค์."],
|
| 875 |
["๋์์ธ์ ์์๋ฐ ํ์ง์ด ๊ฐ๊ฒฉ์ ๋นํด ๋ณ๋ก์
๋๋ค. ๊ทธ๋ฅ์ ๋ฅ์ด์์."],
|
| 876 |
+
["์ธํธ ๊ฐ๊ฒฉ ๊ฐ์ฑ๋น ์ต๊ณ ์์ฉโค๏ธ๐คใ๋ฐ๋ปํ๊ณ ํญ๋ฅํญ๋ฅํ ๋๋ ๋๋ฌด ์กฐ์์ฌ!! ํ ๋๋ฌด ์๋ป์ฉ!!!"]
|
| 877 |
]
|
| 878 |
|
| 879 |
# Gradio ์ธํฐํ์ด์ค ์์ฑ - ๋ชจ๋ ๋์๋ณด๋ ๋ ์ด์์
|
|
|
|
| 994 |
show_label=True
|
| 995 |
)
|
| 996 |
|
| 997 |
+
# ์ข
ํฉ ๋ถ์ - ์ ์ฒด ๋๋น
|
| 998 |
+
gr.HTML('<div class="card-header sentiment-positive">โ๏ธ ์ข
ํฉ ๋ถ์ & ์ธ์ฌ์ดํธ</div>')
|
| 999 |
+
comprehensive_output = gr.Markdown(
|
| 1000 |
+
value="""## โ๏ธ ์ข
ํฉ ๋ถ์
|
| 1001 |
|
| 1002 |
+
| ํญ๋ชฉ | ๊ฐ์ | ๋ง์กฑ๋ | ๊ทผ๊ฑฐ |
|
| 1003 |
+
|------|------|--------|------|
|
| 1004 |
+
| - | - | - | - |
|
| 1005 |
+
|
| 1006 |
+
| ์ ์ฒด ํค | - | - | - |
|
| 1007 |
+
|
| 1008 |
+
## ๐ก ์์ฝ ๋ฌธ์ฅ
|
| 1009 |
+
|
| 1010 |
+
**"๋ฆฌ๋ทฐ๋ฅผ ์
๋ ฅํ๊ณ ๋ถ์์ ์์ํ์ธ์"**
|
| 1011 |
+
""",
|
| 1012 |
+
show_label=False
|
| 1013 |
+
)
|
| 1014 |
|
| 1015 |
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
| 1016 |
submit_btn.click(
|