RohitBh commited on
Commit
9e01c0a
1 Parent(s): a94035e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +428 -78
app.py CHANGED
@@ -4,110 +4,460 @@ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
  from textblob import TextBlob
5
  from transformers import pipeline
6
  import matplotlib.pyplot as plt
 
7
  import os
8
  from wordcloud import WordCloud
9
- import gradio as gr
10
 
 
 
 
 
11
 
12
- # Function to analyze sentiment using the custom Hugging Face pipeline
13
- def analyze_sentiment_hf(text):
14
- hf_pipeline = pipeline("sentiment-analysis")
15
  if len(text) > 512:
16
- text = text[:511]
17
- sentiment_result = hf_pipeline(text)
18
- sentiment_label = sentiment_result[0]["label"]
19
- if sentiment_label == "LABEL_1":
 
20
  return "Positive"
21
- elif sentiment_label == "LABEL_0":
22
  return "Negative"
23
- else:
24
  return "Neutral"
25
 
26
- # Function to analyze sentiment using VADER
27
- def analyze_sentiment_vader(text):
28
- sentiment_analyzer = SentimentIntensityAnalyzer()
29
- sentiment_score = sentiment_analyzer.polarity_scores(text)["compound"]
30
- if sentiment_score > 0:
31
  return "Positive"
32
- elif sentiment_score == 0:
33
  return "Neutral"
34
  else:
35
  return "Negative"
36
 
37
- # Function to analyze sentiment using TextBlob
38
- def analyze_sentiment_textblob(text):
39
- sentiment_analysis = TextBlob(text)
40
- score = sentiment_analysis.sentiment.polarity
41
- if score > 0:
42
  return "Positive"
43
- elif score == 0:
44
  return "Neutral"
45
  else:
46
  return "Negative"
47
 
48
- # Function to display DataFrame with sentiment
49
- def display_results_dataframe(data_frame):
50
- st.write(data_frame)
51
 
52
- # Function to display a pie chart of sentiment distribution
53
- def create_pie_chart(data_frame, sentiment_column):
54
- sentiment_distribution = data_frame[sentiment_column].value_counts()
55
  fig, ax = plt.subplots()
56
- ax.pie(sentiment_distribution, labels=sentiment_distribution.index, autopct='%1.1f%%', startangle=90)
57
- ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
 
 
 
 
 
58
  st.pyplot(fig)
59
 
60
- # Function to display word cloud based on sentiment data
61
- def create_word_cloud(sentiment_data):
62
- wordcloud_generator = WordCloud(width=800, height=400).generate(sentiment_data)
 
 
 
 
 
 
 
 
 
 
63
  fig, ax = plt.subplots(figsize=(10, 5))
64
- ax.imshow(wordcloud_generator, interpolation='bilinear')
65
- ax.axis('off')
66
  st.pyplot(fig)
67
 
68
- # Main UI setup
69
- st.set_page_config(page_title="Sentiment Analysis Tool", page_icon=":bar_chart:")
70
- st.title("Sentiment Analysis Tool")
71
-
72
- # Sidebar configuration for user input options
73
- st.sidebar.title("Analysis Options")
74
- input_type = st.sidebar.selectbox("Choose Input Type", ["Text Input", "CSV Upload"])
75
- model_choice = st.sidebar.selectbox("Choose Sentiment Analysis Model", ["Hugging Face", "VADER", "TextBlob"])
76
- display_type = st.sidebar.selectbox("Choose Display Type", ["DataFrame", "Pie Chart", "Word Cloud"])
77
-
78
- # Process input based on user choice
79
- if input_type == "Text Input":
80
- user_text = st.text_input("Enter text for sentiment analysis:")
81
- if st.button("Analyze Sentiment"):
82
- if user_text:
83
- # Analyzing sentiment based on selected model
84
- if model_choice == "Hugging Face":
85
- sentiment = analyze_sentiment_hf(user_text)
86
- elif model_choice == "VADER":
87
- sentiment = analyze_sentiment_vader(user_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  else:
89
- sentiment = analyze_sentiment_textblob(user_text)
90
-
91
- st.write("Detected Sentiment:", sentiment)
92
- else:
93
- st.warning("Please enter some text to analyze.")
94
- elif input_type == "CSV Upload":
95
- uploaded_file = st.file_uploader("Upload CSV file for analysis", type="csv")
96
- if st.button("Start Analysis"):
97
- if uploaded_file is not None:
98
- data_frame = pd.read_csv(uploaded_file)
99
- # Assuming the CSV has a column named 'text' for analysis
100
- if 'text' in data_frame.columns:
101
- data_frame['Sentiment'] = data_frame['text'].apply(lambda x: analyze_sentiment_hf(x) if model_choice == "Hugging Face" else (analyze_sentiment_vader(x) if model_choice == "VADER" else analyze_sentiment_textblob(x)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- if display_type == "DataFrame":
104
- display_results_dataframe(data_frame)
105
- elif display_type == "Pie Chart":
106
- create_pie_chart(data_frame, 'Sentiment')
107
- elif display_type == "Word Cloud":
108
- combined_text = ' '.join(data_frame['text'])
109
- create_word_cloud(combined_text)
110
- else:
111
- st.error("The uploaded CSV file must contain a 'text' column.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  else:
113
- st.warning("Please upload a CSV file to proceed with analysis.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from textblob import TextBlob
5
  from transformers import pipeline
6
  import matplotlib.pyplot as plt
7
+ import base64
8
  import os
9
  from wordcloud import WordCloud
 
10
 
11
+ # Function to perform sentiment analysis using Hugging Face model
12
+ hf_sentiment_analyzer = pipeline(
13
+ "sentiment-analysis", "Dmyadav2001/Sentimental-Analysis"
14
+ )
15
 
16
+ def analyze_hf_sentiment(text):
 
 
17
  if len(text) > 512:
18
+ temp = text[:511]
19
+ text = temp
20
+ result = hf_sentiment_analyzer(text)
21
+ label = result[0]["label"]
22
+ if label == "LABEL_1":
23
  return "Positive"
24
+ elif label == "LABEL_0":
25
  return "Negative"
26
+ elif label == "LABEL_2":
27
  return "Neutral"
28
 
29
+ # Function to perform sentiment analysis using VADER
30
+ def analyze_vader_sentiment(text):
31
+ analyzer = SentimentIntensityAnalyzer()
32
+ vader_score = analyzer.polarity_scores(text)["compound"]
33
+ if vader_score > 0:
34
  return "Positive"
35
+ elif vader_score == 0:
36
  return "Neutral"
37
  else:
38
  return "Negative"
39
 
40
+ # Function to perform sentiment analysis using TextBlob
41
+ def analyze_textblob_sentiment(text):
42
+ analysis = TextBlob(text)
43
+ sentiment_score = analysis.sentiment.polarity
44
+ if sentiment_score > 0:
45
  return "Positive"
46
+ elif sentiment_score == 0:
47
  return "Neutral"
48
  else:
49
  return "Negative"
50
 
51
+ # Function to display DataFrame with updated sentiment column
52
+ def display_dataframe(df):
53
+ st.write(df)
54
 
55
+ # Function to display pie chart for sentiment distribution
56
+ def display_pie_chart(df, column):
57
+ sentiment_counts = df[column].value_counts()
58
  fig, ax = plt.subplots()
59
+ ax.pie(
60
+ sentiment_counts,
61
+ labels=sentiment_counts.index,
62
+ autopct="%1.1f%%",
63
+ startangle=140,
64
+ )
65
+ ax.axis("equal")
66
  st.pyplot(fig)
67
 
68
+ # Add a download button
69
+ if st.button('Download Pie Chart'):
70
+ # Save the pie chart as an image file
71
+ plt.savefig('pie_chart.png')
72
+
73
+ # Offer the image file for download
74
+ st.download_button(label='Download Pie Chart Image', data=open('pie_chart.png', 'rb').read(), file_name='pie_chart.png', mime='image/png')
75
+
76
+ # Function to display word cloud
77
+ def display_wordcloud(text_data):
78
+ wordcloud = WordCloud(width=800, height=400, background_color="white").generate(
79
+ text_data
80
+ )
81
  fig, ax = plt.subplots(figsize=(10, 5))
82
+ ax.imshow(wordcloud, interpolation="bilinear")
83
+ ax.axis("off")
84
  st.pyplot(fig)
85
 
86
+ # Add a download button
87
+ if st.button('Download Word Cloud'):
88
+ # Save the word cloud as an image file
89
+ plt.savefig('word_cloud.png')
90
+
91
+ # Offer the image file for download
92
+ st.download_button(label='Download Word Cloud Image', data=open('word_cloud.png', 'rb').read(), file_name='word_cloud.png', mime='image/png')
93
+
94
+ # Function to download CSV file
95
+ def download_csv(df):
96
+ csv = df.to_csv(index=False)
97
+ b64 = base64.b64encode(csv.encode()).decode() # B64 encoding
98
+ href = f'<a href="data:file/csv;base64,{b64}" download="sentiment_analysis_results.csv">Download CSV File</a>'
99
+ st.markdown(href, unsafe_allow_html=True)
100
+
101
+ # Function to count occurrences of keywords and sentiment distribution
102
+ def count_reviews_with_keywords(df,keywords):
103
+ # keywords=['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking']
104
+ keyword_counts = {keyword: {"Positive": 0, "Negative": 0, "Total": 0} for keyword in keywords}
105
+
106
+ for _, row in df.iterrows():
107
+ review_text = row["review_text"]
108
+ sentiment = row["Sentiment"]
109
+
110
+ for keyword in keywords:
111
+ if keyword.lower() in review_text.lower():
112
+ keyword_counts[keyword]["Total"] += 1
113
+ if sentiment == "Positive":
114
+ keyword_counts[keyword]["Positive"] += 1
115
+ elif sentiment == "Negative":
116
+ keyword_counts[keyword]["Negative"] += 1
117
+
118
+ return keyword_counts
119
+
120
+
121
+ # Streamlit UI
122
+ st.set_page_config(page_title="SentimentAnalysis App", page_icon=":smiley:")
123
+ st.title("SentimentAnalysis App")
124
+
125
+ # Sidebar
126
+ st.sidebar.title("Options")
127
+ input_option = st.sidebar.radio("Select Input Option", ("Free Text", "CSV Files"))
128
+ selected_model = st.sidebar.radio(
129
+ "Select Sentiment Analysis Model", ("VADER", "TextBlob", "Hugging Face")
130
+ )
131
+ result_option = st.sidebar.radio(
132
+ "Select Result Display Option",
133
+ ("DataFrame", "Pie Chart", "Bar Chart", "Keyword Frequency", "WordCloud", "Comparative Sentiment Analysis"),
134
+ )
135
+
136
+ # Main content
137
+ progress_label = st.empty() # Define progress label
138
+ progress_bar = st.progress(0)
139
+ progress = 0
140
+
141
+ # Directory path to store processed files
142
+ processed_directory = "processed_files"
143
+
144
+ # Ensure the directory exists, if not create it
145
+ os.makedirs(processed_directory, exist_ok=True)
146
+
147
+ # List to store processed filenames
148
+ processed_files = []
149
+
150
+ # Function to get filenames from the processed directory
151
+ def get_processed_filenames():
152
+ return [
153
+ f
154
+ for f in os.listdir(processed_directory)
155
+ if os.path.isfile(os.path.join(processed_directory, f))
156
+ ]
157
+
158
+ if input_option == "Free Text":
159
+ st.subheader("Enter review for sentiment analysis:")
160
+ user_input = st.text_area("", "")
161
+ if not user_input:
162
+ st.info("Enter some text above for sentiment analysis.")
163
+ else:
164
+ with st.spinner("Analyzing..."):
165
+ if selected_model == "Hugging Face":
166
+ result = analyze_hf_sentiment(user_input)
167
+ elif selected_model == "VADER":
168
+ result = analyze_vader_sentiment(user_input)
169
+ elif selected_model == "TextBlob":
170
+ result = analyze_textblob_sentiment(user_input)
171
+ st.write("Sentiment:", result)
172
+
173
+ if input_option == "CSV Files":
174
+ st.subheader("Select CSV files for sentiment analysis:")
175
+
176
+ # Uploading new file
177
+ files = st.file_uploader(
178
+ "Upload New File", type=["csv"], accept_multiple_files=True
179
+ )
180
+ if files:
181
+ # Process uploaded new files
182
+ for file in files:
183
+ if file.type != "text/csv":
184
+ st.warning(
185
+ "Uploaded file is not a CSV file. Please upload a CSV file only."
186
+ )
187
  else:
188
+ df = pd.read_csv(file)
189
+ if "review_text" not in df.columns:
190
+ st.warning(
191
+ "Uploaded CSV file doesn't contain 'review_text' column. Please check the CSV file format."
192
+ )
193
+ else:
194
+ total_rows = len(df)
195
+
196
+ sentiments_v = []
197
+ sentiments_tb = []
198
+ sentiments_hf = []
199
+
200
+ for review_text in df["review_text"]:
201
+ sentiments_v.append(analyze_vader_sentiment(review_text))
202
+ sentiments_tb.append(analyze_textblob_sentiment(review_text))
203
+ sentiments_hf.append(analyze_hf_sentiment(review_text))
204
+ progress += 1
205
+ progress_label.text(f"{progress}/{total_rows}")
206
+ progress_bar.progress(min(progress / total_rows, 1.0))
207
+
208
+ df["VADER Sentiment"] = sentiments_v
209
+ df["TextBlob Sentiment"] = sentiments_tb
210
+ df["HuggingFace Sentiment"] = sentiments_hf
211
+
212
+ # Save processed file with modified filename
213
+ new_filename = os.path.splitext(file.name)[0] + "1.csv"
214
+ df.to_csv(
215
+ os.path.join(processed_directory, new_filename), index=False
216
+ )
217
+ st.success(f"New file processed and saved as {new_filename}")
218
+
219
+ # List of already processed files
220
+ processed_files = get_processed_filenames()
221
+ selected_files = st.multiselect("Select from Processed Files", processed_files)
222
+
223
+ if not files and not selected_files:
224
+ st.info(
225
+ "Upload a new CSV file or select from processed files above for sentiment analysis."
226
+ )
227
+
228
+ all_dfs = []
229
+
230
+ # Process already selected files
231
+ for file_name in selected_files:
232
+ df = pd.read_csv(os.path.join(processed_directory, file_name))
233
+ all_dfs.append(df)
234
+
235
+ # Results
236
+ if all_dfs:
237
+ combined_df = pd.concat(all_dfs, ignore_index=True)
238
+ if selected_model == "TextBlob":
239
+ result = "TextBlob Sentiment"
240
+ combined_df.drop(
241
+ columns=["VADER Sentiment", "HuggingFace Sentiment"],
242
+ inplace=True,
243
+ )
244
+ elif selected_model == "VADER":
245
+ result = "VADER Sentiment"
246
+ combined_df.drop(
247
+ columns=["TextBlob Sentiment", "HuggingFace Sentiment"],
248
+ inplace=True,
249
+ )
250
+ elif selected_model == "Hugging Face":
251
+ result = "HuggingFace Sentiment"
252
+ combined_df.drop(
253
+ columns=["TextBlob Sentiment", "VADER Sentiment"],
254
+ inplace=True,
255
+ )
256
+ combined_df.rename(columns={result: "Sentiment"}, inplace=True)
257
+
258
+ if result_option == "DataFrame":
259
+ st.subheader("Sentiment Analysis Results")
260
+ display_dataframe(combined_df)
261
+ download_csv(combined_df)
262
+ elif result_option == "Pie Chart":
263
+ st.subheader("Sentiment Distribution")
264
+ display_pie_chart(combined_df, "Sentiment")
265
+ elif result_option == "Bar Chart":
266
+ # Calculate value counts
267
+ sentiment_counts = combined_df["Sentiment"].value_counts()
268
+ # Display bar chart
269
+ st.bar_chart(sentiment_counts)
270
+
271
+ # Add a download button
272
+ if st.button('Download Sentiment Counts Chart'):
273
+ # Plot the bar chart
274
+ fig, ax = plt.subplots()
275
+ sentiment_counts.plot(kind='bar', ax=ax)
276
+ plt.xlabel('Sentiment')
277
+ plt.ylabel('Count')
278
+ plt.title('Sentiment Counts')
279
+ plt.xticks(rotation=45, ha='right')
280
+ plt.tight_layout()
281
 
282
+ # Save the bar chart as an image file
283
+ plt.savefig('sentiment_counts_chart.png')
284
+
285
+ # Offer the image file for download
286
+ st.download_button(label='Download Sentiment Counts Chart Image', data=open('sentiment_counts_chart.png', 'rb').read(), file_name='sentiment_counts_chart.png', mime='image/png')
287
+
288
+ elif result_option == "Keyword Frequency":
289
+ st.subheader("Keyword Frequency")
290
+
291
+ # List of keywords
292
+ keywords = [
293
+ "delivery",
294
+ "shipping",
295
+ "parcel",
296
+ "package",
297
+ "tracking",
298
+ "shipment",
299
+ "cargo",
300
+ "freight",
301
+ "automation",
302
+ "automated",
303
+ "robotic",
304
+ "robots",
305
+ "AI",
306
+ "artificial intelligence",
307
+ "machine learning",
308
+ "chatbot",
309
+ "virtual assistant",
310
+ "customer support",
311
+ "real-time",
312
+ "instant",
313
+ "live update",
314
+ "status",
315
+ "IoT",
316
+ "internet of things",
317
+ "connected devices",
318
+ "smart technology",
319
+ "blockchain",
320
+ "ledger",
321
+ "transparency",
322
+ "security",
323
+ "sustainability",
324
+ "eco-friendly",
325
+ "green logistics",
326
+ "carbon footprint",
327
+ "customer service",
328
+ "support",
329
+ "experience",
330
+ "satisfaction",
331
+ "data analytics",
332
+ "big data",
333
+ "analysis",
334
+ "insights",
335
+ "cloud computing",
336
+ "cloud-based",
337
+ "digital infrastructure",
338
+ "storage",
339
+ "5G",
340
+ "connectivity",
341
+ "network speed",
342
+ "wireless",
343
+ "drone",
344
+ "aerial delivery",
345
+ "UAV",
346
+ "drone shipping",
347
+ "augmented reality",
348
+ "AR",
349
+ "virtual reality",
350
+ "VR",
351
+ "3D printing",
352
+ "additive manufacturing",
353
+ "custom parts",
354
+ "prototyping",
355
+ "inventory management",
356
+ "stock levels",
357
+ "warehouse management",
358
+ "storage solutions",
359
+ "supply chain",
360
+ "logistics",
361
+ "supply network",
362
+ "distribution",
363
+ "eco-packaging",
364
+ "sustainable materials",
365
+ "recycling",
366
+ "waste reduction",
367
+ "digital platform",
368
+ "e-commerce",
369
+ "online shopping",
370
+ "online order",
371
+ "cybersecurity",
372
+ "data protection",
373
+ "privacy",
374
+ "encryption",
375
+ "predictive modeling",
376
+ "forecasting",
377
+ "demand planning",
378
+ "trend analysis",
379
+ "robotics",
380
+ "automated vehicles",
381
+ "self-driving cars",
382
+ "logistics automation",
383
+ "visibility",
384
+ "supply chain visibility",
385
+ "track and trace",
386
+ "monitoring",
387
+ "integration",
388
+ "ERP",
389
+ "supply chain integration",
390
+ "software",
391
+ "optimization",
392
+ "efficiency",
393
+ "process improvement",
394
+ "lean logistics",
395
+ "personalization",
396
+ "customization",
397
+ "tailored services",
398
+ "personal touch",
399
+ "ethical sourcing",
400
+ "fair trade",
401
+ "labor rights",
402
+ "ethical business",
403
+ "user experience",
404
+ "UX",
405
+ "customer journey",
406
+ "service design",
407
+ "visibility",
408
+ ]
409
+ text_data = " ".join(combined_df["review_text"])
410
+ keyword_frequency = (
411
+ pd.Series(text_data.split()).value_counts().reset_index()
412
+ )
413
+ keyword_frequency.columns = ["Keyword", "Frequency"]
414
+
415
+ # Filter keyword frequency for specific keywords
416
+ filtered_keyword_frequency = keyword_frequency[
417
+ keyword_frequency["Keyword"].isin(keywords)
418
+ ]
419
+
420
+ # Display bar chart for filtered keyword frequency
421
+ st.bar_chart(filtered_keyword_frequency.set_index("Keyword"))
422
+
423
+ # Add a download button
424
+ if st.button('Download Keyword Frequency Chart'):
425
+ # Plot the bar chart
426
+ fig, ax = plt.subplots()
427
+ filtered_keyword_frequency.plot(kind='bar', x='Keyword', y='Frequency', ax=ax)
428
+ plt.xticks(rotation=45, ha='right')
429
+ plt.tight_layout()
430
+
431
+ # Save the bar chart as an image file
432
+ plt.savefig('keyword_frequency_chart.png')
433
+
434
+ # Offer the image file for download
435
+ st.download_button(label='Download Keyword Frequency Chart Image', data=open('keyword_frequency_chart.png', 'rb').read(), file_name='keyword_frequency_chart.png', mime='image/png')
436
+ elif result_option == "Word Cloud":
437
+ st.subheader("Word Cloud")
438
+ text_data = " ".join(combined_df["review_text"])
439
+ display_wordcloud(text_data)
440
  else:
441
+ st.subheader("Comparative Sentiment Analysis")
442
+ supply_chain_areas = {
443
+ 'logistics': ['logistics', 'supply chain', 'cargo', 'shipment', 'freight', 'package', 'tracking'],
444
+ 'delivery': ['delivery', 'shipping', 'courier', 'postal', 'parcel'],
445
+ 'inventory': ['inventory', 'stock', 'storage', 'warehouse', 'security’'],
446
+ 'customer service': ['customer service', 'support', 'helpdesk', 'service center', 'experience', 'refund'],
447
+ 'procurement': ['procurement', 'sourcing', 'purchasing', 'buying', 'order'],
448
+ 'distribution': ['distribution', 'supply network', 'distribution center'],
449
+ 'manufacturing': ['manufacturing', 'production', 'assembly', 'quality', 'defect']
450
+ }
451
+
452
+ supply_chain_area = st.sidebar.radio(
453
+ "Select Supply Chain Area",
454
+ ("logistics", "delivery", "inventory", "customer service", "procurement", "distribution","manufacturing"),
455
+ )
456
+ # Call the function to count occurrences of keywords and sentiment distribution
457
+ keyword_counts = count_reviews_with_keywords(combined_df,supply_chain_areas[supply_chain_area])
458
+
459
+ # Convert keyword_counts to DataFrame
460
+ df_counts = pd.DataFrame(keyword_counts).transpose()
461
+
462
+ # Plot dual bar chart horizontally
463
+ st.bar_chart(df_counts[["Positive", "Negative"]], use_container_width=True, height=500)