File size: 6,465 Bytes
1a630df 3ba8d69 1a630df 3ba8d69 1a630df 3ba8d69 1a630df 3ba8d69 1a630df 3ba8d69 1a630df 901b59e 1a630df 3ba8d69 1a630df 3ba8d69 1a630df 3e5ab07 a606c00 1a630df 3ba8d69 1a630df 3ba8d69 1a630df a606c00 3ba8d69 a606c00 1a630df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
from transformers import pipeline
import matplotlib.pyplot as plt
import streamlit as st
import langid
import pandas as pd
from difflib import SequenceMatcher
import random
def calculate_similarity(a, b):
return SequenceMatcher(None, a, b).ratio()
def filter_similar_items(list, similarity_threshold):
filtered_data = []
for item in list:
is_similar = False
for saved_item in filtered_data:
similarity = calculate_similarity(item, saved_item)
if similarity > similarity_threshold:
is_similar = True
break
if not is_similar:
filtered_data.append(item)
return filtered_data
def process_data(input_data,columnname = 'text', num_data = 100):
random.seed(20979738)
processed_data = [i for i in input_data[columnname]]
random_selection = random.sample(processed_data, num_data)
filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
st.write('Number of data input: ',len(random_selection))
st.write('After removing duplicates: ',len(filtered_data))
return filtered_data
def chi2eng(filtered_data):
translated_data = []
language_Classification = langid.classify(filtered_data[0])[0]
if language_Classification == "zh":
st.write("Your input is Chinese, translating to English")
st.write('▶️ Translation model start downing, loading model may takes time, please wait...')
trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
st.write('⏺️ Translation model successfully loaded')
for i in filtered_data:
st.write(trans_pipe(i)[0]['translation_text'])
translated_data.append(trans_pipe(i)[0]['translation_text'])
elif language_Classification == 'en':
st.write("Your input is English, moving to next stage...")
translated_data = [i for i in filtered_data]
else:
st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
return translated_data
# Text Classification:Negative/Neutral/Positive
def emotion_classification(translated_data):
st.write('▶️ Classification model start downing, loading model may takes time, please wait...')
emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
st.write('⏺️ Classification model successfully loaded')
negative_count, neutral_count, positive_count = 0,0,0
negative_dict = {}
for i in translated_data:
labelled_result = emo_pipe(i)[0]['label']
st.write('Text: ',i)
st.write('Label: ',labelled_result)
st.write(' ')
if labelled_result == 'negative':
negative_dict[i] = emo_pipe(i)[0]['score']
negative_count += 1
if labelled_result == 'neutral':
neutral_count += 1
if labelled_result == 'positive':
positive_count += 1
sizes = [negative_count, neutral_count, positive_count]
labels = ['negative_review', 'neutral_review', 'positive_review']
# 创建饼状图
st.write('Number of Positive Reviews: ', positive_count)
st.write('Number of Neutral Reviews: ', neutral_count)
st.write('Number of Negative Reviews: ', negative_count)
plt.figure(figsize=(5, 5)) # 设置图表大小
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
# 显示图表
st.pyplot(plt.show())
negative_dict_sorted = dict(sorted(negative_dict.items(), key=lambda x: x[1], reverse=True))
top10_negative_str = ""
if len(negative_dict_sorted) < 10:
st.write("Totall Number of Negative Comments: ",len(negative_dict_sorted))
for k,v in negative_dict_sorted.items():
st.write(k)
top10_negative_str += f"{k}."
else:
st.write("Top 10 Negative Comments")
count = 0
for k,v in negative_dict_sorted.items():
if count >= 10:
break
st.write(k)
top10_negative_str += f"{k}."
count += 1
return top10_negative_str
# Summarization
def summarization(top10_negative_str):
st.write('▶️ Summarizatio model start downing, loading model may takes time, please wait...')
summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
st.write('⏺️ Summarization model successfully loaded')
summarized_text = summarize_pipe(top10_negative_str)
return summarized_text
def main():
st.set_option('deprecation.showPyplotGlobalUse', False)
st.set_page_config(page_title="Review Sentiment Analysis and Improvement Summarisation Report for Business Product", page_icon="🦜")
st.header("Review Sentiment Analysis and Improvement Summarisation Report for Business Product")
try:
uploaded_file = st.file_uploader("🔶 Upload CSV file for analysis 🔶", type={"csv"})
if uploaded_file is not None:
columnname = st.text_input("🔶 Please enter the column name in CSV file you want to analyze 🔶")
num_data = st.number_input("🔶 Please enter the number of rows you want to process 🔶",step=1)
input_data = pd.read_csv(uploaded_file)
st.dataframe(input_data)
st.text('️️ ')
#stage 1:process data
st.text('🔶 Processing Data 🔶')
processed_data = process_data(input_data ,columnname, int(num_data))
st.write(processed_data)
st.text('️️🟢 Processing Data Finished 🟢')
st.text('️️ ')
#stage 2:translate
st.text('🔶 Checking Translation is Needed or Not 🔶')
translated_data = chi2eng(processed_data)
st.write(translated_data)
st.text('️️🟢 Translation Finished 🟢')
st.text('️️ ')
#stage 3:emotion Classification
st.text('️️🔶 Processing Emotion Classification 🔶')
top10_negative_str = emotion_classification(translated_data)
st.text('️️🟢 Emotion Classification Finished 🟢')
st.text('️️ ')
#stage 4:Summarization
st.text('🔶 Processing Summarization 🔶')
summarized_text = summarization(top10_negative_str)
st.write(summarized_text)
st.text('️️🟢 Summarization Finished 🟢')
except:
st.write("")
if __name__ == "__main__":
main() |