deeplearningwithpython5240 commited on
Commit
1a630df
·
verified ·
1 Parent(s): d2f8699

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import matplotlib.pyplot as plt
3
+ import streamlit as st
4
+ import langid
5
+ import pandas as pd
6
+ from difflib import SequenceMatcher
7
+ import random
8
+
9
+ def calculate_similarity(a, b):
10
+ return SequenceMatcher(None, a, b).ratio()
11
+
12
+ def filter_similar_items(list, similarity_threshold):
13
+ filtered_data = []
14
+ for item in list:
15
+ is_similar = False
16
+ for saved_item in filtered_data:
17
+ similarity = calculate_similarity(item, saved_item)
18
+ if similarity > similarity_threshold:
19
+ is_similar = True
20
+ break
21
+ if not is_similar:
22
+ filtered_data.append(item)
23
+ return filtered_data
24
+
25
+ def process_data(input_data,columnname = 'text', num_data = 100):
26
+ random.seed(20979738)
27
+ processed_data = [i for i in input_data[columnname]]
28
+ random_selection = random.sample(processed_data, num_data)
29
+ filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
30
+ st.write('The Number of Data You Input: ',len(random_selection))
31
+ st.write('After Removing Duplicates: ',len(filtered_data))
32
+ return filtered_data
33
+
34
+ def chi2eng(filtered_data):
35
+ trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
36
+ translated_data = []
37
+ language_Classification = langid.classify(filtered_data[0])[0]
38
+ if language_Classification == "zh":
39
+ st.write("Your input is Chinese, Translating to English")
40
+ for i in filtered_data:
41
+ st.write(trans_pipe(i)[0]['translation_text'])
42
+ translated_data.append(trans_pipe(i)[0]['translation_text'])
43
+ elif language_Classification == 'en':
44
+ st.write("Your input is English, Moving to Next Stage")
45
+ translated_data = [i for i in filtered_data]
46
+ else:
47
+ st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
48
+ return translated_data
49
+
50
+ # Text Classification:Negative/Neutral/Positive
51
+ def emotion_classification(translated_data):
52
+ emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
53
+ negative_count, neutral_count, positive_count = 0,0,0
54
+ negative_dict = {}
55
+ for i in translated_data:
56
+ labelled_result = emo_pipe(i)[0]['label']
57
+ if labelled_result == 'negative':
58
+ negative_dict[i] = emo_pipe(i)[0]['score']
59
+ negative_count += 1
60
+ if labelled_result == 'neutral':
61
+ neutral_count += 1
62
+ if labelled_result == 'positive':
63
+ positive_count += 1
64
+ sizes = [negative_count, neutral_count, positive_count]
65
+ labels = ['negative_review', 'neutral_review', 'positive_review']
66
+ # 创建饼状图
67
+ st.write('Number of Positive Reviews: ', positive_count)
68
+ st.write('Number of Neutral Reviews: ', neutral_count)
69
+ st.write('Number of Negative Reviews: ', negative_count)
70
+ plt.figure(figsize=(5, 5)) # 设置图表大小
71
+ plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
72
+ # 显示图表
73
+ st.pyplot(plt.show())
74
+ negative_dict_sorted = dict(sorted(negative_dict.items(), key=lambda x: x[1], reverse=True))
75
+ top10_negative_str = ""
76
+ if len(negative_dict_sorted) < 10:
77
+ st.write("Totall Number of Negative Comments: ",len(negative_dict_sorted))
78
+ for k,v in negative_dict_sorted.items():
79
+ st.write(k)
80
+ top10_negative_str += f"{k}."
81
+ else:
82
+ st.write("Top 10 Negative Comments")
83
+ count = 0
84
+ for k,v in negative_dict_sorted.items():
85
+ if count >= 10:
86
+ break
87
+ st.write(k)
88
+ top10_negative_str += f"{k}."
89
+ count += 1
90
+ return top10_negative_str
91
+
92
+ # Summarization
93
+ def summarization(top10_negative_str):
94
+ summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
95
+ summarized_text = summarize_pipe(top10_negative_str)
96
+ return summarized_text
97
+
98
+ def main():
99
+ st.set_option('deprecation.showPyplotGlobalUse', False)
100
+ st.set_page_config(page_title="Review Sentiment Analysis and Improvement Summarisation Report for Business Product", page_icon="🦜")
101
+ st.header("Review Sentiment Analysis and Improvement Summarisation Report for Business Product")
102
+ uploaded_file = st.file_uploader("🔶 Upload CSV file for analysis 🔶", type={"csv"})
103
+ columnname = st.text_input("🔶 Please enter the column name in CSV file you want to analyze 🔶")
104
+ num_data = st.number_input("🔶 Please enter the number of rows you want to process 🔶",step=1)
105
+ input_data = pd.read_csv(uploaded_file)
106
+ st.dataframe(input_data)
107
+ st.text('️️ ')
108
+ if uploaded_file is not None:
109
+ uploaded_file.seek(0)
110
+ #stage 1:process data
111
+ st.text('🔶 Processing Data 🔶')
112
+ processed_data = process_data(input_data ,columnname, int(num_data))
113
+ st.write(processed_data)
114
+ st.text('️️🟢 Processing Data Finished ��')
115
+ st.text('️️ ')
116
+
117
+ #stage 2:translate
118
+ st.text('🔶 Checking Translation is Needed or Not 🔶')
119
+ translated_data = chi2eng(processed_data)
120
+ st.write(translated_data)
121
+ st.text('️️🟢 Translation Finished 🟢')
122
+ st.text('️️ ')
123
+
124
+ #stage 3:emotion Classification
125
+ st.text('️️🔶 Processing Emotion Calssification 🔶')
126
+ top10_negative_str = emotion_classification(translated_data)
127
+ st.text('️️🟢 Emotion Calssification Finished 🟢')
128
+ st.text('️️ ')
129
+
130
+ #stage 4:Summarization
131
+ st.text('🔶 Processing Summarization 🔶')
132
+ summarized_text = summarization(top10_negative_str)
133
+ st.write(summarized_text)
134
+ st.text('️️🟢 Summarization Finished 🟢')
135
+
136
+ if __name__ == "__main__":
137
+ main()