Spaces:
Running
Running
import gradio as gr | |
import json | |
import re | |
import html | |
import os | |
import pandas as pd | |
import re | |
# models | |
from transformers import AutoTokenizer, AutoModelForTokenClassification,AutoModelForSequenceClassification, pipeline | |
import torch | |
aspdevice = "cpu" | |
# Your aspect extraction logic | |
model_id_ate = "gauneg/roberta-base-absa-ate-sentiment" | |
tokenizer_ate = AutoTokenizer.from_pretrained(model_id_ate) | |
model_ate = AutoModelForTokenClassification.from_pretrained(model_id_ate) | |
senti_pipeline = pipeline(task='ner', model=model_ate, tokenizer=tokenizer_ate, device=aspdevice, aggregation_strategy='simple') | |
# Your emotion detection logic | |
emodevice = "mps" if torch.backends.mps.is_available() else 0 if torch.cuda.is_available() else -1 | |
emotion_model = "j-hartmann/emotion-english-distilroberta-base" | |
emo_tokenizer = AutoTokenizer.from_pretrained(emotion_model) | |
emo_model = AutoModelForSequenceClassification.from_pretrained(emotion_model) | |
classifier = pipeline("text-classification", model=emo_model,tokenizer=emo_tokenizer, top_k=None, device=emodevice) | |
def extract_full_word(text, start, end): | |
word_start = start | |
while word_start > 0 and re.match(r'\w', text[word_start - 1]): | |
word_start -= 1 | |
word_end = end | |
while word_end < len(text) and re.match(r'\w', text[word_end]): | |
word_end += 1 | |
return text[word_start:word_end].strip() | |
def extract_full_analysis(review): | |
sentences = [rev.strip() for rev in review.split(".") if rev.strip()] | |
#aspect predictions | |
asppredictions = senti_pipeline(review) | |
#emotion predictions | |
emopredictions = classifier(sentences) | |
#Extract aspects | |
aspect_word = [ | |
{"word": d["word"].strip(), "start": d["start"], "end": d["end"]} | |
for d in asppredictions | |
] | |
# Extract sentiment term | |
sentiments = [d["entity_group"] for d in asppredictions] | |
scores = [f"{d['score']:.4f}" for d in asppredictions] | |
refined_aspects = [] | |
for aspect in aspect_word: | |
full_word = extract_full_word(review, aspect["start"], aspect["end"]) | |
if len(full_word) >= 3: | |
refined_aspects.append(full_word) | |
refined_aspects = list(dict.fromkeys(refined_aspects)) | |
flat_preds = [d for sentence_preds in emopredictions for d in sentence_preds if d["score"] >= 0.5] | |
emotions = [d["label"] for d in flat_preds] | |
emotion_score = [d["score"] for d in flat_preds] | |
dynamic_result ={ | |
"review": review, | |
"aspect_words": refined_aspects, | |
"sentiment": sentiments, | |
"score": scores, | |
"emotions": emotions, | |
"emo-score": emotion_score | |
} | |
return dynamic_result | |
def highlight_aspects(text, aspects): | |
aspects_sorted = sorted(aspects, key=len, reverse=True) | |
aspects_regex = [re.escape(asp) for asp in aspects_sorted] | |
pattern = r'\b(?:' + '|'.join(aspects_regex) + r')\b' | |
def replace_match(match): | |
return f'<span style="font-weight: bold; background-color: yellow;">{match.group()}</span>' | |
return re.sub(pattern, replace_match, text, flags=re.IGNORECASE) | |
def format_review(review): | |
text = review[0]['review'] | |
aspects = review[0]['aspect_words'] | |
highlighted_text = highlight_aspects(text, aspects) | |
aspects = [asp.strip() for asp in review[0]['aspect_words']] | |
emotions = [emo.strip() for emo in review[0]['emotions']] | |
sentiments = [senti for senti in review[0]['sentiment']] | |
sentimentScores = [scor for scor in review[0]['score']] | |
emotionScores = [emscor for emscor in review[0]['emo-score']] | |
aspects_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(asp)}</div>' for asp in aspects) + '</div>' | |
emotions_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(emo)}</div>' for emo in emotions) + '</div>' | |
sentiments_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(senti)}</div>' for senti in sentiments) + '</div>' | |
sentiScor_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{sentiscor}</div>' for sentiscor in sentimentScores) + '</div>' | |
emoScor_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{emoscor}</div>' for emoscor in emotionScores) + '</div>' | |
return f""" | |
<div style="border: 1px solid #ccc; padding: 10px; margin-bottom: 10px;"> | |
<h4>Review</h4> | |
<p>{highlighted_text}</p> | |
<h3>Aspect Words</h3> | |
{aspects_html} | |
<h3>Sentiments</h3> | |
{sentiments_html} | |
<h3>Emotions</h3> | |
{emotions_html} | |
</div> | |
""" | |
def submit_new_review(text): | |
if not text.strip(): | |
return "Please enter a review.", [] | |
new_dynamic_reviews =[] | |
_reviews = extract_full_analysis(text) | |
new_dynamic_reviews.append(_reviews) | |
return format_review(new_dynamic_reviews), new_dynamic_reviews | |
with gr.Blocks(css=""" | |
.cell-grid { | |
display: grid; | |
grid-template-columns: repeat(6, 1fr); | |
gap: 10px; | |
} | |
.cell-item { | |
background-color: #f0f0f0; | |
padding: 10px; | |
border: 1px solid #ccc; | |
text-align: center; | |
} | |
""") as demo: | |
# Header | |
gr.Markdown("# Yelp Review Demonstration for Aspect and Emotion Extracted") | |
# Submit Form | |
gr.Markdown("## Submit Your Review") | |
with gr.Row(): | |
with gr.Column(scale=8): | |
submit_text = gr.Textbox(label="Write your review", lines=5) | |
submit_button = gr.Button("Submit") | |
# Dynamic Reviews | |
gr.Markdown("## User Submitted Reviews") | |
dynamic_display = gr.HTML() | |
# States | |
dynamic_reviews_state = gr.State() | |
submit_button.click(submit_new_review, inputs=submit_text, outputs=[dynamic_display, dynamic_reviews_state]) | |
# Launch | |
if __name__ == "__main__": | |
demo.launch() | |
# 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 |