deeplearningwithpython5240's picture
Create app.py
1a630df verified
raw
history blame
5.84 kB
from transformers import pipeline
import matplotlib.pyplot as plt
import streamlit as st
import langid
import pandas as pd
from difflib import SequenceMatcher
import random
def calculate_similarity(a, b):
return SequenceMatcher(None, a, b).ratio()
def filter_similar_items(list, similarity_threshold):
filtered_data = []
for item in list:
is_similar = False
for saved_item in filtered_data:
similarity = calculate_similarity(item, saved_item)
if similarity > similarity_threshold:
is_similar = True
break
if not is_similar:
filtered_data.append(item)
return filtered_data
def process_data(input_data,columnname = 'text', num_data = 100):
random.seed(20979738)
processed_data = [i for i in input_data[columnname]]
random_selection = random.sample(processed_data, num_data)
filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
st.write('The Number of Data You Input: ',len(random_selection))
st.write('After Removing Duplicates: ',len(filtered_data))
return filtered_data
def chi2eng(filtered_data):
trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
translated_data = []
language_Classification = langid.classify(filtered_data[0])[0]
if language_Classification == "zh":
st.write("Your input is Chinese, Translating to English")
for i in filtered_data:
st.write(trans_pipe(i)[0]['translation_text'])
translated_data.append(trans_pipe(i)[0]['translation_text'])
elif language_Classification == 'en':
st.write("Your input is English, Moving to Next Stage")
translated_data = [i for i in filtered_data]
else:
st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
return translated_data
# Text Classification:Negative/Neutral/Positive
def emotion_classification(translated_data):
emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
negative_count, neutral_count, positive_count = 0,0,0
negative_dict = {}
for i in translated_data:
labelled_result = emo_pipe(i)[0]['label']
if labelled_result == 'negative':
negative_dict[i] = emo_pipe(i)[0]['score']
negative_count += 1
if labelled_result == 'neutral':
neutral_count += 1
if labelled_result == 'positive':
positive_count += 1
sizes = [negative_count, neutral_count, positive_count]
labels = ['negative_review', 'neutral_review', 'positive_review']
# 创建饼状图
st.write('Number of Positive Reviews: ', positive_count)
st.write('Number of Neutral Reviews: ', neutral_count)
st.write('Number of Negative Reviews: ', negative_count)
plt.figure(figsize=(5, 5)) # 设置图表大小
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
# 显示图表
st.pyplot(plt.show())
negative_dict_sorted = dict(sorted(negative_dict.items(), key=lambda x: x[1], reverse=True))
top10_negative_str = ""
if len(negative_dict_sorted) < 10:
st.write("Totall Number of Negative Comments: ",len(negative_dict_sorted))
for k,v in negative_dict_sorted.items():
st.write(k)
top10_negative_str += f"{k}."
else:
st.write("Top 10 Negative Comments")
count = 0
for k,v in negative_dict_sorted.items():
if count >= 10:
break
st.write(k)
top10_negative_str += f"{k}."
count += 1
return top10_negative_str
# Summarization
def summarization(top10_negative_str):
summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
summarized_text = summarize_pipe(top10_negative_str)
return summarized_text
def main():
st.set_option('deprecation.showPyplotGlobalUse', False)
st.set_page_config(page_title="Review Sentiment Analysis and Improvement Summarisation Report for Business Product", page_icon="🦜")
st.header("Review Sentiment Analysis and Improvement Summarisation Report for Business Product")
uploaded_file = st.file_uploader("🔶 Upload CSV file for analysis 🔶", type={"csv"})
columnname = st.text_input("🔶 Please enter the column name in CSV file you want to analyze 🔶")
num_data = st.number_input("🔶 Please enter the number of rows you want to process 🔶",step=1)
input_data = pd.read_csv(uploaded_file)
st.dataframe(input_data)
st.text('️️ ')
if uploaded_file is not None:
uploaded_file.seek(0)
#stage 1:process data
st.text('🔶 Processing Data 🔶')
processed_data = process_data(input_data ,columnname, int(num_data))
st.write(processed_data)
st.text('️️🟢 Processing Data Finished 🟢')
st.text('️️ ')
#stage 2:translate
st.text('🔶 Checking Translation is Needed or Not 🔶')
translated_data = chi2eng(processed_data)
st.write(translated_data)
st.text('️️🟢 Translation Finished 🟢')
st.text('️️ ')
#stage 3:emotion Classification
st.text('️️🔶 Processing Emotion Calssification 🔶')
top10_negative_str = emotion_classification(translated_data)
st.text('️️🟢 Emotion Calssification Finished 🟢')
st.text('️️ ')
#stage 4:Summarization
st.text('🔶 Processing Summarization 🔶')
summarized_text = summarization(top10_negative_str)
st.write(summarized_text)
st.text('️️🟢 Summarization Finished 🟢')
if __name__ == "__main__":
main()