Spaces:

deeplearningwithpython5240
/

Review_Sentiment_Analysis_with_Summarisation

Sleeping

App Files Files Community

Review_Sentiment_Analysis_with_Summarisation / app.py

deeplearningwithpython5240

Create app.py

1a630df verified about 1 year ago

raw

history blame

5.84 kB

	from transformers import pipeline
	import matplotlib.pyplot as plt
	import streamlit as st
	import langid
	import pandas as pd
	from difflib import SequenceMatcher
	import random

	def calculate_similarity(a, b):
	return SequenceMatcher(None, a, b).ratio()

	def filter_similar_items(list, similarity_threshold):
	filtered_data = []
	for item in list:
	is_similar = False
	for saved_item in filtered_data:
	similarity = calculate_similarity(item, saved_item)
	if similarity > similarity_threshold:
	is_similar = True
	break
	if not is_similar:
	filtered_data.append(item)
	return filtered_data

	def process_data(input_data,columnname = 'text', num_data = 100):
	random.seed(20979738)
	processed_data = [i for i in input_data[columnname]]
	random_selection = random.sample(processed_data, num_data)
	filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
	st.write('The Number of Data You Input: ',len(random_selection))
	st.write('After Removing Duplicates: ',len(filtered_data))
	return filtered_data

	def chi2eng(filtered_data):
	trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
	translated_data = []
	language_Classification = langid.classify(filtered_data[0])[0]
	if language_Classification == "zh":
	st.write("Your input is Chinese, Translating to English")
	for i in filtered_data:
	st.write(trans_pipe(i)[0]['translation_text'])
	translated_data.append(trans_pipe(i)[0]['translation_text'])
	elif language_Classification == 'en':
	st.write("Your input is English, Moving to Next Stage")
	translated_data = [i for i in filtered_data]
	else:
	st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
	return translated_data

	# Text Classification：Negative/Neutral/Positive
	def emotion_classification(translated_data):
	emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
	negative_count, neutral_count, positive_count = 0,0,0
	negative_dict = {}
	for i in translated_data:
	labelled_result = emo_pipe(i)[0]['label']
	if labelled_result == 'negative':
	negative_dict[i] = emo_pipe(i)[0]['score']
	negative_count += 1
	if labelled_result == 'neutral':
	neutral_count += 1
	if labelled_result == 'positive':
	positive_count += 1
	sizes = [negative_count, neutral_count, positive_count]
	labels = ['negative_review', 'neutral_review', 'positive_review']
	# 创建饼状图
	st.write('Number of Positive Reviews: ', positive_count)
	st.write('Number of Neutral Reviews: ', neutral_count)
	st.write('Number of Negative Reviews: ', negative_count)
	plt.figure(figsize=(5, 5)) # 设置图表大小
	plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
	# 显示图表
	st.pyplot(plt.show())
	negative_dict_sorted = dict(sorted(negative_dict.items(), key=lambda x: x[1], reverse=True))
	top10_negative_str = ""
	if len(negative_dict_sorted) < 10:
	st.write("Totall Number of Negative Comments: ",len(negative_dict_sorted))
	for k,v in negative_dict_sorted.items():
	st.write(k)
	top10_negative_str += f"{k}."
	else:
	st.write("Top 10 Negative Comments")
	count = 0
	for k,v in negative_dict_sorted.items():
	if count >= 10:
	break
	st.write(k)
	top10_negative_str += f"{k}."
	count += 1
	return top10_negative_str

	# Summarization
	def summarization(top10_negative_str):
	summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
	summarized_text = summarize_pipe(top10_negative_str)
	return summarized_text

	def main():
	st.set_option('deprecation.showPyplotGlobalUse', False)
	st.set_page_config(page_title="Review Sentiment Analysis and Improvement Summarisation Report for Business Product", page_icon="🦜")
	st.header("Review Sentiment Analysis and Improvement Summarisation Report for Business Product")
	uploaded_file = st.file_uploader("🔶 Upload CSV file for analysis 🔶", type={"csv"})
	columnname = st.text_input("🔶 Please enter the column name in CSV file you want to analyze 🔶")
	num_data = st.number_input("🔶 Please enter the number of rows you want to process 🔶",step=1)
	input_data = pd.read_csv(uploaded_file)
	st.dataframe(input_data)
	st.text('️️ ')
	if uploaded_file is not None:
	uploaded_file.seek(0)
	#stage 1：process data
	st.text('🔶 Processing Data 🔶')
	processed_data = process_data(input_data ,columnname, int(num_data))
	st.write(processed_data)
	st.text('️️🟢 Processing Data Finished 🟢')
	st.text('️️ ')

	#stage 2：translate
	st.text('🔶 Checking Translation is Needed or Not 🔶')
	translated_data = chi2eng(processed_data)
	st.write(translated_data)
	st.text('️️🟢 Translation Finished 🟢')
	st.text('️️ ')

	#stage 3：emotion Classification
	st.text('️️🔶 Processing Emotion Calssification 🔶')
	top10_negative_str = emotion_classification(translated_data)
	st.text('️️🟢 Emotion Calssification Finished 🟢')
	st.text('️️ ')

	#stage 4：Summarization
	st.text('🔶 Processing Summarization 🔶')
	summarized_text = summarization(top10_negative_str)
	st.write(summarized_text)
	st.text('️️🟢 Summarization Finished 🟢')

	if __name__ == "__main__":
	main()