Spaces:

pknayak
/

sentiement_app

Runtime error

pknayak commited on Oct 9, 2023

Commit

b386cb4

1 Parent(s): c1f7d31

Adding the Data Viz

* Adding helper functions for making the data viz
* Making plotting functions
* Adding the necessary
* adding the gradio related code for data viz

Files changed (1) hide show

app.py +176 -3

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import json
 import pandas as pd
-# -----imports for Sentiment Analyzer
 import re
 from sklearn.pipeline import Pipeline
@@ -20,6 +20,19 @@ from nltk.stem import RSLPStemmer
 import joblib
 #--------------------------------------------------------------------------------------
 #------------------------ NEWS DATA RETRIEVER------------------------------------------
 #--------------------------------------------------------------------------------------
@@ -318,9 +331,153 @@ def sentiment_analyzer(csv_file_name='combined_news_response.csv'):
-# Creating the app for both
 with gr.Blocks() as demo:
   gr.Markdown("# Welcome to News Retrieval and Sentiment Analyzer App a.k.a InfoMood Tracker")
@@ -329,8 +486,11 @@ with gr.Blocks() as demo:
     gr.Markdown("1. Select the Domain from which you want to retrieve the news")
     gr.Markdown("2. Click on the `Retrieve news` to retrieve the news from the domain. You Should see that the result displayed in the form of Table")
     gr.Markdown("3. Click on the `Analyze Sentiment` to analyze the sentiments of the news retrieved.")
-    # gr.Markdown("4. ")
   with gr.Row():
     with gr.Column(scale=1, min_width=600):
       ui_domain = gr.Dropdown(["bbc", "forbes", "businessinsider_us"], label="Select Domain")
@@ -339,6 +499,7 @@ with gr.Blocks() as demo:
       retrieve_button.click(call_functions, inputs=ui_domain, outputs=df_output)
   with gr.Row():
     with gr.Column(scale=1, min_width=600):
         ui_input = gr.Textbox(value='combined_news_response.csv' , visible=False)
@@ -347,5 +508,17 @@ with gr.Blocks() as demo:
         view_sentiment_bttn.click(sentiment_analyzer, inputs=ui_input, outputs=df_output)
 demo.launch(debug=True)

 import json
 import pandas as pd
+# ----------------imports for Sentiment Analyzer----------------------
 import re
 from sklearn.pipeline import Pipeline
 import joblib
+# --------------------------------imports for Data Vizualisation
+from wordcloud import WordCloud
+from collections import Counter
+import matplotlib.pyplot as plt
+import seaborn as sns
+%matplotlib inline
+from matplotlib.gridspec import GridSpec
+import plotly.offline as py
+import plotly.express as px
+import plotly.graph_objs as go
 #--------------------------------------------------------------------------------------
 #------------------------ NEWS DATA RETRIEVER------------------------------------------
 #--------------------------------------------------------------------------------------
+#----------------------------------------------------------------------------------------------
+#----------------------------------DATA VIZUALIZER---------------------------------------------
+#----------------------------------------------------------------------------------------------
+def get_senti_pct_distribution(expt_df):
+  sentiment_counts = expt_df['sentiment'].value_counts()
+  labels = sentiment_counts.index
+  sizes = sentiment_counts.values
+  colors = ['lightblue', 'limegreen', 'lightcoral']
+  # Create a pie chart
+  plt.figure(figsize=(8, 8))
+  plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
+  # Equal aspect ratio ensures that pie is drawn as a circle
+  plt.axis('equal')
+  plt.title('Sentiment Distribution for Labelled Data')
+  # plt.show()
+  return plt
+def preprocessing_data(expt_df):
+  # Creating a list of comment reviews
+  news = list(expt_df['content'].values)
+  # Applying RegEx
+  news_breakline = re_breakline(news)
+  expt_df['re_breakline'] = news_breakline
+  # Applying RegEx
+  news_hyperlinks = re_hyperlinks(news_breakline)
+  expt_df['re_hyperlinks'] = news_hyperlinks
+  # Applying RegEx
+  news_dates = re_dates(news_hyperlinks)
+  expt_df['re_dates'] = news_dates
+  # Applying RegEx
+  news_money = re_money(news_dates)
+  expt_df['re_money'] = news_money
+  # Applying RegEx
+  news_numbers = re_numbers(news_money)
+  expt_df['re_numbers'] = news_numbers
+  # Applying RegEx
+  news_negation = re_negation(news_numbers)
+  expt_df['re_negation'] = news_negation
+  # Applying RegEx
+  news_special_chars = re_special_chars(news_negation)
+  expt_df['re_special_chars'] = news_special_chars
+  # Applying RegEx
+  news_whitespaces = re_whitespaces(news_special_chars)
+  expt_df['re_whitespaces'] = news_whitespaces
+  # Removing stopwords and looking at some examples
+  news_stopwords = [' '.join(stopwords_removal(news)) for news in news_whitespaces]
+  expt_df['stopwords_removed'] = news_stopwords
+  return expt_df
+def generate_wc(processed_expt_df):
+  # Generating words
+  pos_news = list(processed_expt_df.query('sentiment == "positive"')['stopwords_removed'].values)
+  positive_words = ' '.join(pos_news).split(' ')
+  neg_news = list(processed_expt_df.query('sentiment == "negative"')['stopwords_removed'].values)
+  negative_words = ' '.join(neg_news).split(' ')
+  neu_news = list(processed_expt_df.query('sentiment == "neutral"')['stopwords_removed'].values)
+  neutral_words = ' '.join(neu_news).split(' ')
+  # Using Counter for creating a dictionary counting
+  positive_dict = Counter(positive_words)
+  negative_dict = Counter(negative_words)
+  neutral_dict = Counter(neutral_words)
+  # Generating wordclouds for news
+  positive_wc = WordCloud(width=1280,
+                          height=720,
+                          collocations=False,
+                          random_state=42,
+                          # mask=transf_like_mask,
+                          colormap='Blues', background_color='white',
+                          max_words=50).generate_from_frequencies(positive_dict)
+  negative_wc = WordCloud(width=1280,
+                          height=720,
+                          collocations=False,
+                          random_state=42,
+                          # mask=transf_bomb_mask,
+                          colormap='Reds',
+                          background_color='white',
+                          max_words=50).generate_from_frequencies(negative_dict)
+  neutral_wc = WordCloud(width=1280,
+                          height=720,
+                          collocations=False,
+                          random_state=42,
+                          # mask=transf_bomb_mask,
+                          colormap='Greens',
+                          background_color='white',
+                          max_words=50).generate_from_frequencies(neutral_dict)
+  return positive_wc, negative_wc, neutral_wc
+def plot_news_wc(positive_wc, negative_wc, neutral_wc):
+  fig, axs = plt.subplots(1, 3, figsize=(20, 20))
+  ax1 = axs[0]
+  ax2 = axs[1]
+  ax3 = axs[2]
+  ax1.imshow(positive_wc)
+  ax1.axis('off')
+  ax1.set_title('WordCloud for Positive Words in News', size=18, pad=20)
+  ax2.imshow(negative_wc)
+  ax2.axis('off')
+  ax2.set_title('WordCloud for Negative Words in News', size=18, pad=20)
+  ax3.imshow(neutral_wc)
+  ax3.axis('off')
+  ax3.set_title('WordCloud for Neutral Words in News', size=18, pad=20)
+  return fig
+def get_news_wc(expt_df):
+  processed_expt_df = preprocessing_data(expt_df)
+  positive_wc, negative_wc, neutral_wc = generate_wc(processed_expt_df)
+  return plot_news_wc(positive_wc, negative_wc, neutral_wc)
+def call_data_viz_func(plot_type):
+  senti_csv_file_name = 'sentiment.csv'
+  expt_df = pd.read_csv(senti_csv_file_name)
+  if plot_type=='percentage_plot':
+    return get_senti_pct_distribution(expt_df)
+  elif plot_type=='word_count_plot':
+    return get_news_wc(expt_df)
+  else:
+    raise ValueError("Unknown plot type selected")
+#---------------------- GRADIO APP --------------------
 with gr.Blocks() as demo:
   gr.Markdown("# Welcome to News Retrieval and Sentiment Analyzer App a.k.a InfoMood Tracker")
     gr.Markdown("1. Select the Domain from which you want to retrieve the news")
     gr.Markdown("2. Click on the `Retrieve news` to retrieve the news from the domain. You Should see that the result displayed in the form of Table")
     gr.Markdown("3. Click on the `Analyze Sentiment` to analyze the sentiments of the news retrieved.")
+    gr.Markdown("4. Select the radio button `percentage_plot` or `word_count_plot`. Click on the `Vizualize data` to view the respective Vizualization. If needed click the `Clear` Button to clear the plot ")
+    gr.Markdown("NOTE: Each depends on the file saved the it's previous step, so the sequence is important. For example, you can't get the data viz until an unless you have the Sentiment Analyzed File ")
+    # GRADIO ROW FOR NEWS COLLECTOR
   with gr.Row():
     with gr.Column(scale=1, min_width=600):
       ui_domain = gr.Dropdown(["bbc", "forbes", "businessinsider_us"], label="Select Domain")
       retrieve_button.click(call_functions, inputs=ui_domain, outputs=df_output)
+    # GRADIO ROW FOR ANALYSING SENTIMENT
   with gr.Row():
     with gr.Column(scale=1, min_width=600):
         ui_input = gr.Textbox(value='combined_news_response.csv' , visible=False)
         view_sentiment_bttn.click(sentiment_analyzer, inputs=ui_input, outputs=df_output)
+  with gr.Row():
+    with gr.Column(scale=1, min_width=600):
+      ui_plot_type = gr.Radio(label="Plot type",
+                              choices=["percentage_plot", "word_count_plot"],
+                              value='percentage_plot')
+      data_viz_bt = gr.Button("Vizualize data")
+      plt_output = gr.Plot(label="Data Vizualizer for the News App", show_label=True,)
+      gr.ClearButton(plt_output)
+      data_viz_bt.click(call_data_viz_func, inputs=ui_plot_type, outputs=plt_output)
 demo.launch(debug=True)