Update app.py
Browse files
app.py
CHANGED
@@ -27,21 +27,23 @@ def process_data(input_data,columnname = 'text', num_data = 100):
|
|
27 |
processed_data = [i for i in input_data[columnname]]
|
28 |
random_selection = random.sample(processed_data, num_data)
|
29 |
filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
|
30 |
-
st.write('
|
31 |
-
st.write('After
|
32 |
return filtered_data
|
33 |
|
34 |
def chi2eng(filtered_data):
|
35 |
-
trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
|
36 |
translated_data = []
|
37 |
language_Classification = langid.classify(filtered_data[0])[0]
|
38 |
if language_Classification == "zh":
|
39 |
-
st.write("Your input is Chinese,
|
|
|
|
|
|
|
40 |
for i in filtered_data:
|
41 |
st.write(trans_pipe(i)[0]['translation_text'])
|
42 |
translated_data.append(trans_pipe(i)[0]['translation_text'])
|
43 |
elif language_Classification == 'en':
|
44 |
-
st.write("Your input is English,
|
45 |
translated_data = [i for i in filtered_data]
|
46 |
else:
|
47 |
st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
|
@@ -49,7 +51,9 @@ def chi2eng(filtered_data):
|
|
49 |
|
50 |
# Text Classification:Negative/Neutral/Positive
|
51 |
def emotion_classification(translated_data):
|
|
|
52 |
emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
|
|
|
53 |
negative_count, neutral_count, positive_count = 0,0,0
|
54 |
negative_dict = {}
|
55 |
for i in translated_data:
|
@@ -91,7 +95,9 @@ def emotion_classification(translated_data):
|
|
91 |
|
92 |
# Summarization
|
93 |
def summarization(top10_negative_str):
|
|
|
94 |
summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
|
|
|
95 |
summarized_text = summarize_pipe(top10_negative_str)
|
96 |
return summarized_text
|
97 |
|
@@ -122,9 +128,9 @@ def main():
|
|
122 |
st.text('️️ ')
|
123 |
|
124 |
#stage 3:emotion Classification
|
125 |
-
st.text('️️🔶 Processing Emotion
|
126 |
top10_negative_str = emotion_classification(translated_data)
|
127 |
-
st.text('️️🟢 Emotion
|
128 |
st.text('️️ ')
|
129 |
|
130 |
#stage 4:Summarization
|
@@ -133,7 +139,7 @@ def main():
|
|
133 |
st.write(summarized_text)
|
134 |
st.text('️️🟢 Summarization Finished 🟢')
|
135 |
except:
|
136 |
-
st.write("
|
137 |
|
138 |
if __name__ == "__main__":
|
139 |
main()
|
|
|
27 |
processed_data = [i for i in input_data[columnname]]
|
28 |
random_selection = random.sample(processed_data, num_data)
|
29 |
filtered_data = filter_similar_items(random_selection, similarity_threshold = 0.5)
|
30 |
+
st.write('Number of data input: ',len(random_selection))
|
31 |
+
st.write('After removing duplicates: ',len(filtered_data))
|
32 |
return filtered_data
|
33 |
|
34 |
def chi2eng(filtered_data):
|
|
|
35 |
translated_data = []
|
36 |
language_Classification = langid.classify(filtered_data[0])[0]
|
37 |
if language_Classification == "zh":
|
38 |
+
st.write("Your input is Chinese, translating to English")
|
39 |
+
st.write('▶️ Translation model start downing, loading model may takes time, please wait...')
|
40 |
+
trans_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en")
|
41 |
+
st.write('⏺️ Translation model successfully loaded')
|
42 |
for i in filtered_data:
|
43 |
st.write(trans_pipe(i)[0]['translation_text'])
|
44 |
translated_data.append(trans_pipe(i)[0]['translation_text'])
|
45 |
elif language_Classification == 'en':
|
46 |
+
st.write("Your input is English, moving to next stage...")
|
47 |
translated_data = [i for i in filtered_data]
|
48 |
else:
|
49 |
st.write('The anguage you input is: ',langid.classify(filtered_data[0])[0],'the program cannot process')
|
|
|
51 |
|
52 |
# Text Classification:Negative/Neutral/Positive
|
53 |
def emotion_classification(translated_data):
|
54 |
+
st.write('▶️ Classification model start downing, loading model may takes time, please wait...')
|
55 |
emo_pipe = pipeline("text-classification", model="deeplearningwithpython5240/twitter_roberta_base_sentiment_fintune_with_app_reviews")
|
56 |
+
st.write('⏺️ Classification model successfully loaded')
|
57 |
negative_count, neutral_count, positive_count = 0,0,0
|
58 |
negative_dict = {}
|
59 |
for i in translated_data:
|
|
|
95 |
|
96 |
# Summarization
|
97 |
def summarization(top10_negative_str):
|
98 |
+
st.write('▶️ Summarizatio model start downing, loading model may takes time, please wait...')
|
99 |
summarize_pipe = pipeline("text2text-generation", model="deeplearningwithpython5240/summarisation-t5-finetuned-model", max_new_tokens =512)
|
100 |
+
st.write('⏺️ Summarization model successfully loaded')
|
101 |
summarized_text = summarize_pipe(top10_negative_str)
|
102 |
return summarized_text
|
103 |
|
|
|
128 |
st.text('️️ ')
|
129 |
|
130 |
#stage 3:emotion Classification
|
131 |
+
st.text('️️🔶 Processing Emotion Classification 🔶')
|
132 |
top10_negative_str = emotion_classification(translated_data)
|
133 |
+
st.text('️️🟢 Emotion Classification Finished 🟢')
|
134 |
st.text('️️ ')
|
135 |
|
136 |
#stage 4:Summarization
|
|
|
139 |
st.write(summarized_text)
|
140 |
st.text('️️🟢 Summarization Finished 🟢')
|
141 |
except:
|
142 |
+
st.write("")
|
143 |
|
144 |
if __name__ == "__main__":
|
145 |
main()
|