Spaces:

deeplearningwithpython5240
/

Review_Sentiment_Analysis_with_Summarisation

Sleeping

App Files Files Community

deeplearningwithpython5240 commited on Mar 19, 2024

Commit

1a630df

verified ·

1 Parent(s): d2f8699

Create app.py

Browse files

Files changed (1) hide show

app.py +137 -0

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from transformers import pipeline
+import matplotlib.pyplot as plt
+import streamlit as st
+import langid
+import pandas as pd
+from difflib import SequenceMatcher
+import random
+def calculate_similarity(a, b):
+    return SequenceMatcher(None, a, b).ratio()
+def filter_similar_items(list, similarity_threshold):
+    filtered_data = []
+    for item in list:
+        is_similar = False
+        for saved_item in filtered_data:
+            similarity = calculate_similarity(item, saved_item)
+            if similarity > similarity_threshold:
+                is_similar = True
+                break
+        if not is_similar:
+            filtered_data.append(item)
+    return filtered_data
+def process_data(input_data,columnname = 'text', num_data = 100):
+    random.seed(20979738)
+    processed_data = [i for i in input_data[columnname]]
+    random_selection = random.sample(processed_data, num_data)
+    filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
+    st.write('The Number of Data You Input: ',len(random_selection))
+    st.write('After Removing Duplicates: ',len(filtered_data))
+    return filtered_data
+def chi2eng(filtered_data):
+    trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
+    translated_data = []
+    language_Classification = langid.classify(filtered_data[0])[0]
+    if language_Classification == "zh":
+      st.write("Your input is Chinese, Translating to English")
+      for i in filtered_data:
+        st.write(trans_pipe(i)[0]['translation_text'])
+        translated_data.append(trans_pipe(i)[0]['translation_text'])
+    elif language_Classification == 'en':
+        st.write("Your input is English, Moving to Next Stage")
+        translated_data = [i for i in filtered_data]
+    else:
+        st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
+    return translated_data
+# Text Classification：Negative/Neutral/Positive
+def emotion_classification(translated_data):
+    emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
+    negative_count, neutral_count, positive_count = 0,0,0
+    negative_dict = {}
+    for i in translated_data:
+      labelled_result = emo_pipe(i)[0]['label']
+      if labelled_result == 'negative':
+        negative_dict[i] = emo_pipe(i)[0]['score']
+        negative_count += 1
+      if labelled_result == 'neutral':
+        neutral_count += 1
+      if labelled_result == 'positive':
+        positive_count += 1
+    sizes = [negative_count, neutral_count, positive_count]
+    labels = ['negative_review', 'neutral_review', 'positive_review']
+    # 创建饼状图
+    st.write('Number of Positive Reviews: ', positive_count)
+    st.write('Number of Neutral Reviews: ', neutral_count)
+    st.write('Number of Negative Reviews: ', negative_count)
+    plt.figure(figsize=(5, 5))  # 设置图表大小
+    plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
+    # 显示图表
+    st.pyplot(plt.show())
+    negative_dict_sorted = dict(sorted(negative_dict.items(), key=lambda x: x[1], reverse=True))
+    top10_negative_str = ""
+    if len(negative_dict_sorted) < 10:
+        st.write("Totall Number of Negative Comments: ",len(negative_dict_sorted))
+        for k,v in negative_dict_sorted.items():
+          st.write(k)
+          top10_negative_str += f"{k}."
+    else:
+        st.write("Top 10 Negative Comments")
+        count = 0
+        for k,v in negative_dict_sorted.items():
+          if count >= 10:
+            break
+          st.write(k)
+          top10_negative_str += f"{k}."
+          count += 1
+    return top10_negative_str
+# Summarization
+def summarization(top10_negative_str):
+    summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
+    summarized_text = summarize_pipe(top10_negative_str)
+    return summarized_text
+def main():
+    st.set_option('deprecation.showPyplotGlobalUse', False)
+    st.set_page_config(page_title="Review Sentiment Analysis and Improvement Summarisation Report for Business Product", page_icon="🦜")
+    st.header("Review Sentiment Analysis and Improvement Summarisation Report for Business Product")
+    uploaded_file = st.file_uploader("🔶 Upload CSV file for analysis 🔶", type={"csv"})
+    columnname = st.text_input("🔶 Please enter the column name in CSV file you want to analyze 🔶")
+    num_data = st.number_input("🔶 Please enter the number of rows you want to process 🔶",step=1)
+    input_data = pd.read_csv(uploaded_file)
+    st.dataframe(input_data)
+    st.text('️️ ')
+    if uploaded_file is not None:
+        uploaded_file.seek(0)
+        #stage 1：process data
+        st.text('🔶 Processing Data 🔶')
+        processed_data = process_data(input_data ,columnname, int(num_data))
+        st.write(processed_data)
+        st.text('️️🟢 Processing Data Finished ��')
+        st.text('️️ ')
+        #stage 2：translate
+        st.text('🔶 Checking Translation is Needed or Not 🔶')
+        translated_data = chi2eng(processed_data)
+        st.write(translated_data)
+        st.text('️️🟢 Translation Finished 🟢')
+        st.text('️️ ')
+        #stage 3：emotion Classification
+        st.text('️️🔶 Processing Emotion Calssification 🔶')
+        top10_negative_str = emotion_classification(translated_data)
+        st.text('️️🟢 Emotion Calssification Finished 🟢')
+        st.text('️️ ')
+        #stage 4：Summarization
+        st.text('🔶 Processing Summarization 🔶')
+        summarized_text = summarization(top10_negative_str)
+        st.write(summarized_text)
+        st.text('️️🟢 Summarization Finished 🟢')
+if __name__ == "__main__":
+    main()