Spaces:
Sleeping
Sleeping
File size: 7,779 Bytes
b5b6102 3bd3f7f 6770282 8485d8a e041f9c 0755ef3 863e5cb 3bd3f7f 83bb60d dc101b2 863e5cb d88da3d 6770282 3bd3f7f 863e5cb b9e2c51 b552ffa 863e5cb dc07256 a87f48c 3bd3f7f a60c2a3 863e5cb dc07256 f0c38e0 863e5cb df48a2b 863e5cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import torch
from transformers import BertTokenizerFast, BertForSequenceClassification
from torch.nn.functional import softmax
import pandas as pd
import gradio as gra
import gradio as gr
from itertools import islice
from collections import defaultdict
import spacy
import ast
import os
from datasets import load_dataset
# df = load_dataset('csv',data_files=['pp_selected_reviews_JJ_NN.csv)
df = pd.read_csv('/home/user/app/pp_selected_reviews_JJ_NN.csv')
try:
nlp = spacy.load("en_core_web_lg")
except:
script = "python -m spacy download en_core_web_lg"
os.system("bash -c '%s'" % script)
# λͺ¨λΈ λ‘λ
model = BertForSequenceClassification.from_pretrained('GiRak/beer-sentiment-bert') # HuggingFace μ¬μ νμ΅ λͺ¨λΈ μ
λ‘λ
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
# ν ν¬λμ΄μ μ΄κΈ°ν
tokenizer = BertTokenizerFast.from_pretrained('GiRak/beer-sentiment-bert')
def analyze_sentiment(sentence):
# λ¬Έμ₯μ ν ν¬λμ΄μ§νκ³ λͺ¨λΈ μ
λ ₯μΌλ‘ λ³ν
inputs = tokenizer(sentence, return_tensors='pt')
inputs = inputs.to(device)
# λͺ¨λΈμ ν΅ν΄ κ°μ λΆλ₯ μν
outputs = model(**inputs)
logits = outputs.logits
probabilities = softmax(logits, dim=1)
# κ°μ λΆλ₯ νλ₯ μΆμΆ
sentiment_labels = ['Negative', 'Positive']
sentiment_probabilities = {label: probability.item() for label, probability in zip(sentiment_labels, probabilities[0])}
return sentiment_probabilities
def sentiment_analysis(text):
sentiment_probabilities = analyze_sentiment(text)
return sentiment_probabilities
nlp = spacy.load('en_core_web_lg')
# ν€μλμ μ΄κΈ° λΉλ μλ₯Ό κ³μ°
def get_initial_counts(keywords):
initial_counts = defaultdict(int)
for keyword in keywords:
initial_counts[keyword] += 1
return initial_counts
# λͺ
μ¬ ν ν° μ μ₯νκ³ κ° μ μ¬μ±μ κ³μ°νμ¬ λͺ
μ¬ μ μ¬μ± μκ³κ°μ μ΄κ³Όνλμ§ νμΈ
# νμ©μ¬μ μ°¨μ΄λ κ°μΈμ°¨μ μν΄ λ°μνκ³ λͺ
μ¬λ νΉμ§ μ체λ₯Ό λνλ΄κΈ°μ μ§μ€ κ·Έλ£Ήν λμμ΄ λ¨
def custom_similarity(doc1, doc2, noun_similarity_threshold):
doc1_nouns = [token for token in doc1 if token.pos_ == 'NOUN']
doc2_nouns = [token for token in doc2 if token.pos_ == 'NOUN']
noun_similar = any(t1.similarity(t2) > noun_similarity_threshold for t1 in doc1_nouns for t2 in doc2_nouns)
return noun_similar
# μ μ¬ν ν€μλλ€μ λ³ν©νκ³ ν€μλ λΉλ μμ 맀ν μ μ₯
def merge_keywords(keyword_counts, noun_similarity_threshold):
merged_keywords = defaultdict(int)
keyword_mappings = defaultdict(list)
keyword_docs = {}
# μ΄λ―Έ λ±λ‘λ ν€μλλ nlp κ³μ°(μλ² λ©)νμ§ μκ³ λ°λ‘ λ¦¬ν΄ (Memoization)
def get_keyword_doc(keyword):
if keyword not in keyword_docs:
keyword_docs[keyword] = nlp(keyword)
return keyword_docs[keyword]
# μ΄κΈ° λΉλ λμ
λ리λ₯Ό μννλ©° κ° ν€μλμ λν΄ μλ² λ©
for keyword, count in keyword_counts.items():
doc1 = get_keyword_doc(keyword)
merged_or_found_similar = False
# μ΄λ―Έ λ³ν©λ ν€μλ λͺ©λ‘μμ ν΄λΉ ν€μλμ λΉκ΅
for merged_keyword in list(merged_keywords):
# μ΄λ―Έ λ³ν©λ ν€μλμ νμ¬ ν€μλκ° ν¬ν¨λμ΄ μλ€λ©΄ λΉλλ₯Ό λν¨
if keyword == merged_keyword:
merged_keywords[merged_keyword] += count
merged_or_found_similar = True
keyword_mappings[merged_keyword].append(keyword)
break
doc2 = get_keyword_doc(merged_keyword)
# μ μ¬λκ° μκ³κ°λ³΄λ€ ν° κ²½μ°, λ ν€μλλ₯Ό κ°μ κ·Έλ£ΉμΌλ‘ λ¬Άμ
if custom_similarity(doc1, doc2, noun_similarity_threshold):
merged_keywords[merged_keyword] += count
merged_or_found_similar = True
keyword_mappings[merged_keyword].append(keyword)
break
# νμ¬ ν€μλκ° κΈ°μ‘΄ λ³ν©λ ν€μλ λͺ©λ‘μ ν¬ν¨λμ§ μκ³ , μ μ¬ν ν€μλλ μλ€λ©΄ μλ‘μ΄ νλͺ©μΌλ‘ μΆκ°
if not merged_or_found_similar:
merged_keywords[keyword] = count
keyword_mappings[keyword] = [keyword]
return merged_keywords, keyword_mappings
def merge_similar_keywords_noun_weight(dataframe, noun_similarity_threshold=0.7):
# λ¬Έμμ΄λ‘ ννλ 리μ€νΈλ₯Ό μ€μ 리μ€νΈλ‘ λ³ν
dataframe['Keywords_List'] = dataframe['Keywords'].apply(lambda x: ast.literal_eval(x))
all_keywords = dataframe['Keywords_List'].sum()
# λΉλ μ κ³μ° λ° λ³ν©
initial_counts = get_initial_counts(all_keywords)
filtered_and_merged_keywords, merged_keyword_mappings = merge_keywords(initial_counts, noun_similarity_threshold)
sorted_merged_keywords = sorted(filtered_and_merged_keywords.items(), key=lambda x: x[1], reverse=True)
# λ³νλ 'Keywords_List' μ»¬λΌ μμ
dataframe.drop('Keywords_List', axis=1, inplace=True)
return dict(sorted_merged_keywords), dict(merged_keyword_mappings)
#########################################################################################################################
from itertools import islice
beer_df = df
def show_keywords(beer_name, sentiment, flag=0):
one_beer_df = beer_df[beer_df['Beer_name'] == beer_name]
df = one_beer_df[one_beer_df['MultinomialNB_label'] == sentiment]
df.reset_index(drop=True, inplace=True)
keywords, mappings = merge_similar_keywords_noun_weight(df)
if flag == 1:
return "\n".join([f"{k}: {v}" for k, v in islice(keywords.items(), None, 10)])
else:
return "\n".join([f"{k}: {v}" for k, v in islice(keywords.items(), None, 10)]), mappings
def keyword_mappings(beer_name, sentiment, keyword):
_, mappings = show_keywords(beer_name, sentiment)
mapped_keywords = mappings.get(keyword, "ν΄λΉ ν€μλμ 맀νλ λ¬Έμμ΄μ΄ μμ΅λλ€.")
return mapped_keywords
beer_names = list(beer_df['Beer_name'].unique())
sentiments = ["Positive", "Negative"]
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
beer_name_dropdown = gr.Dropdown(choices=beer_names, label="Beer_name", info="Choose the beer you want to see the summary keyword.")
sentiment_dropdown = gr.Dropdown(choices=sentiments, label="Sentiment", info="Choose Positive or Negative sentiment.")
keyword_input = gr.Textbox(label="Keyword", info="Enter a keyword to see its mappings.")
with gr.Column():
output_keywords = gr.Textbox(label="Summary keywords (Top 10)")
output_mapping = gr.Textbox(label="Keyword mapping")
with gr.Row():
with gr.Column():
review_button = gr.Button("Review Topic")
with gr.Column():
mapping_button = gr.Button("Keyword mapping")
review_button.click(fn=lambda beer_name, sentiment: show_keywords(beer_name, sentiment, flag=1),
inputs=[beer_name_dropdown, sentiment_dropdown],
outputs=output_keywords)
mapping_button.click(keyword_mappings,
inputs=[beer_name_dropdown, sentiment_dropdown, keyword_input],
outputs=output_mapping)
# Tab 1
app1 = gr.Interface(fn=sentiment_analysis,
inputs="text",
outputs="label",
live=True,
title = "Beer Sentiment Analysis")
# Tab 2
app2 = demo
# ν 1κ³Ό 2λ₯Ό κ·Έλ£Ήν
tabbed = gra.TabbedInterface([app1, app2],
[ 'Sentiment Analysis' , 'Keyword Extraction'])
tabbed.launch()
# app1.launch() |