from transformers import pipeline
import matplotlib.pyplot as plt
import streamlit as st
import langid
import pandas as pd
from difflib import SequenceMatcher
import random

def calculate_similarity(a, b):
    return SequenceMatcher(None, a, b).ratio()

def filter_similar_items(list, similarity_threshold):
    filtered_data = []
    for item in list:
        is_similar = False
        for saved_item in filtered_data:
            similarity = calculate_similarity(item, saved_item)
            if similarity > similarity_threshold:
                is_similar = True
                break
        if not is_similar:
            filtered_data.append(item)
    return filtered_data

def process_data(input_data,columnname = 'text', num_data = 100):
    random.seed(20979738)
    processed_data = [i for i in input_data[columnname]]
    random_selection = random.sample(processed_data, num_data)
    filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
    st.write('Number of data input: ',len(random_selection))
    st.write('After removing duplicates: ',len(filtered_data))
    return filtered_data

def chi2eng(filtered_data):
    translated_data = []
    language_Classification = langid.classify(filtered_data[0])[0]
    if language_Classification == "zh":
      st.write("Your input is Chinese, translating to English")
      st.write('▶️ Translation model start downing, loading model may takes time, please wait...')
      trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
      st.write('⏺️ Translation model successfully loaded')
      for i in filtered_data:
        st.write(trans_pipe(i)[0]['translation_text'])
        translated_data.append(trans_pipe(i)[0]['translation_text'])
    elif language_Classification == 'en':
        st.write("Your input is English, moving to next stage...")
        translated_data = [i for i in filtered_data]
    else:
        st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
    return translated_data

# Text Classification：Negative/Neutral/Positive
def emotion_classification(translated_data):
    st.write('▶️ Classification model start downing, loading model may takes time, please wait...')
    emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
    st.write('⏺️ Classification model successfully loaded')
    negative_count, neutral_count, positive_count = 0,0,0
    negative_dict = {}
    for i in translated_data:
      labelled_result = emo_pipe(i)[0]['label']
      st.write('Text: ',i)
      st.write('Label: ',labelled_result)
      st.write(' ')
      if labelled_result == 'negative':
        negative_dict[i] = emo_pipe(i)[0]['score']
        negative_count += 1
      if labelled_result == 'neutral':
        neutral_count += 1
      if labelled_result == 'positive':
        positive_count += 1
    sizes = [negative_count, neutral_count, positive_count]
    labels = ['negative_review', 'neutral_review', 'positive_review']
    # 创建饼状图
    st.write('Number of Positive Reviews: ', positive_count)
    st.write('Number of Neutral Reviews: ', neutral_count)
    st.write('Number of Negative Reviews: ', negative_count)
    plt.figure(figsize=(5, 5))  # 设置图表大小
    plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
    # 显示图表
    st.pyplot(plt.show())
    negative_dict_sorted = dict(sorted(negative_dict.items(), key=lambda x: x[1], reverse=True))
    top10_negative_str = ""
    if len(negative_dict_sorted) < 10:
        st.write("Totall Number of Negative Comments: ",len(negative_dict_sorted))
        for k,v in negative_dict_sorted.items():
          st.write(k)
          top10_negative_str += f"{k}."
    else:
        st.write("Top 10 Negative Comments")
        count = 0
        for k,v in negative_dict_sorted.items():
          if count >= 10:
            break
          st.write(k)
          top10_negative_str += f"{k}."
          count += 1
    return top10_negative_str

# Summarization
def summarization(top10_negative_str):
    st.write('▶️ Summarizatio model start downing, loading model may takes time, please wait...')
    summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
    st.write('⏺️ Summarization model successfully loaded')
    summarized_text = summarize_pipe(top10_negative_str)
    return summarized_text

def main():
    st.set_option('deprecation.showPyplotGlobalUse', False)
    st.set_page_config(page_title="Review Sentiment Analysis and Improvement Summarisation Report for Business Product", page_icon="🦜")
    st.header("Review Sentiment Analysis and Improvement Summarisation Report for Business Product")
    try:
      uploaded_file = st.file_uploader("🔶 Upload CSV file for analysis 🔶", type={"csv"})
      if uploaded_file is not None:
        columnname = st.text_input("🔶 Please enter the column name in CSV file you want to analyze 🔶")
        num_data = st.number_input("🔶 Please enter the number of rows you want to process 🔶",step=1)
        input_data = pd.read_csv(uploaded_file)
        st.dataframe(input_data)
        st.text('️️ ')
        #stage 1：process data
        st.text('🔶 Processing Data 🔶')
        processed_data = process_data(input_data ,columnname, int(num_data))
        st.write(processed_data)
        st.text('️️🟢 Processing Data Finished 🟢')
        st.text('️️ ')

        #stage 2：translate
        st.text('🔶 Checking Translation is Needed or Not 🔶')
        translated_data = chi2eng(processed_data)
        st.write(translated_data)
        st.text('️️🟢 Translation Finished 🟢')
        st.text('️️ ')

        #stage 3：emotion Classification
        st.text('️️🔶 Processing Emotion Classification 🔶')
        top10_negative_str = emotion_classification(translated_data)
        st.text('️️🟢 Emotion Classification Finished 🟢')
        st.text('️️ ')

        #stage 4：Summarization
        st.text('🔶 Processing Summarization 🔶')
        summarized_text = summarization(top10_negative_str)
        st.write(summarized_text)
        st.text('️️🟢 Summarization Finished 🟢')
    except:
        st.write("")
        
if __name__ == "__main__":
    main()