menikev commited on
Commit
c298690
1 Parent(s): 863daa6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -55
app.py CHANGED
@@ -36,7 +36,6 @@ def load_and_clean_data():
36
 
37
  df = load_and_clean_data()
38
 
39
-
40
  # Page navigation setup
41
  page_names = ["Dashboard for GESI Conversation in Sri Lanka", "GESI Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
42
  page = st.sidebar.selectbox("Choose a page", page_names)
@@ -63,49 +62,49 @@ color_palette = px.colors.sequential.Viridis
63
 
64
  # Function to render the model prediction visualization page
65
  def render_prediction_page():
66
- st.title("Dashboard for GESI Conversations in Sri Lanka")
67
- st.write("""
68
- Instant Analysis: Enter any text snippet and get immediate predictions from out model train on English, Sinhala and Tamil based languages \n\n
69
- Domain Identification: Discover the subject matter of your text with a quantifiable domain score. """)
70
-
71
- # User input text area
72
- user_input = st.text_are("Enter Text/Content here to analyze", height=150)
73
-
74
- if st.button("Perfrom contextual Analysis"):
75
- # Use run_pipeline to get predictions
76
- predictions = run_pipeline(user_input)
77
-
78
- # Extract prediction details
79
- domain_label = prediction.get("domain_label", "Unknown")
80
- domain_score = prediction.get("domain_socre", 0)
81
- discrimination_label = prediction.get("discrimination_label", "Unknown")
82
- discrimination_score = prediction.get("discrimination_score", 0)
83
 
84
- # Visualization layout
85
- col1, col2 = st.columns(2)
86
 
87
- with col1:
88
- st.markdown("#### Domain Label")
89
- st.markdown(f"## {domain_label}")
90
- st.progress(domain_score)
91
-
92
- with col2:
93
- st.makrdown("#### Discrimination Label")
94
- st.markdown(f"## {discrimination_label}")
95
- st.progress(domain_score)
96
-
97
- col3, col4 = st.columns(2)
98
-
99
- with col3:
100
- # Display Domain Score in Bold
101
- st.markdown(f'**Domain Score: {domain_score:.2f}**', unsafe_allow_html=True)
102
-
103
- with col4:
104
- # Display Discrimination Score in Bold
105
- st.markdown(f'**Discrimination Score: {discrimination_score:.2f}**', unsafe_allow_html=True)
106
-
107
-
108
- # Visualisation for Domain Distribution
 
 
 
 
 
 
 
 
 
 
 
 
109
  def create_pie_chart(df, column, title):
110
  fig = px.pie(df, names=column, title=title, hole=0.35)
111
  fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
@@ -132,16 +131,15 @@ def create_sentiment_distribution_chart(df):
132
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=10))
133
  return fig
134
 
135
-
136
  # Visualization for Correlation between Sentiment and Discrimination
137
  def create_sentiment_discrimination_grouped_chart(df):
138
  # Creating a crosstab of 'Sentiment' and 'Discrimination'
139
  crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination'])
140
 
141
- # Check if 'Yes' and 'No' are in the columns after the crosstab operation
142
  value_vars = crosstab_df.columns.intersection(['Discriminative', 'Non Discriminative']).tolist()
143
 
144
- # If 'No' is not in columns, it will not be included in melting
145
  melted_df = pd.melt(crosstab_df.reset_index(), id_vars='Sentiment', value_vars=value_vars, var_name='Discrimination', value_name='Count')
146
 
147
  # Proceeding to plot only if we have data to plot
@@ -152,8 +150,6 @@ def create_sentiment_discrimination_grouped_chart(df):
152
  else:
153
  return "No data to display for the selected filters."
154
 
155
-
156
-
157
  # Function for Top Domains with Negative Sentiment Chart
158
  def create_top_negative_sentiment_domains_chart(df):
159
  domain_counts = df.groupby(['Domain', 'Sentiment']).size().unstack(fill_value=0)
@@ -164,7 +160,7 @@ def create_top_negative_sentiment_domains_chart(df):
164
  colors = ['limegreen', 'crimson', 'darkcyan']
165
  fig = px.bar(domain_counts_subset, x='Count', y='Domain', title='Top Domains with Negative Sentiment', color='Domain',
166
  orientation='h', color_discrete_sequence=colors)
167
- fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Negative sentiment content Count", yaxis_title="Domain", font=dict(size=10))
168
  return fig
169
 
170
  # Function for Key Phrases in Negative Sentiment Content Chart
@@ -174,7 +170,7 @@ def create_key_phrases_negative_sentiment_chart(df):
174
  count_values = trigrams.toarray().sum(axis=0)
175
  ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
176
  ngram_freq.columns = ['frequency', 'ngram']
177
- fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Negative Sentiment Content')
178
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
179
  return fig
180
 
@@ -195,14 +191,13 @@ def create_key_phrases_positive_sentiment_chart(df):
195
  ngram_freq.columns = ['frequency', 'ngram']
196
 
197
  # Create the bar chart
198
- fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Positive Sentiment Content')
199
 
200
  # Update layout settings
201
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
202
 
203
  return fig
204
 
205
-
206
  # Function for Prevalence of Discriminatory Content Chart
207
  def create_prevalence_discriminatory_content_chart(df):
208
  domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
@@ -226,7 +221,7 @@ def create_top_discriminatory_domains_chart(df):
226
  def create_sentiment_distribution_by_channel_chart(df):
227
  sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
228
  color_map = {'Positive': 'blue', 'Neutral': 'lightblue', 'Negative': 'red'}
229
- fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group', color_discret>
230
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Channel", yaxis_title="Counts", font=dict(size=10), title_x=0.5)
231
  return fig
232
 
@@ -237,13 +232,12 @@ def create_channel_discrimination_chart(df):
237
  fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=50, b=20), font=dict(size=10), title_x=0.5)
238
  return fig
239
 
240
-
241
  # Function for rendering dashboard
242
  def render_dashboard(page, df_filtered):
243
  if page == "Dashboard for GESI Conversations in Sri Lanka":
244
  render_prediction_page()
245
  elif page == "GESI Overview":
246
- st.title(" GESI Overview Dashboard")
247
  col1, col2 = st.columns(2)
248
  with col1:
249
  st.plotly_chart(create_pie_chart(df_filtered, 'Domain', 'Distribution of Domains'))
@@ -290,6 +284,5 @@ def render_dashboard(page, df_filtered):
290
  with col2:
291
  st.plotly_chart(create_channel_discrimination_chart(df_filtered))
292
 
293
-
294
  # Render the selected dashboard page
295
  render_dashboard(page, df_filtered)
 
36
 
37
  df = load_and_clean_data()
38
 
 
39
  # Page navigation setup
40
  page_names = ["Dashboard for GESI Conversation in Sri Lanka", "GESI Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
41
  page = st.sidebar.selectbox("Choose a page", page_names)
 
62
 
63
  # Function to render the model prediction visualization page
64
  def render_prediction_page():
65
+ st.title("Dashboard for GESI Conversations in Sri Lanka")
66
+ st.write("""
67
+ Instant Analysis: Enter any text snippet and get immediate predictions from our model trained on English, Sinhala, and Tamil languages.\n\n
68
+ Domain Identification: Discover the subject matter of your text with a quantifiable domain score.
69
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ # User input text area
72
+ user_input = st.text_area("Enter Text/Content here to analyze", height=150)
73
 
74
+ if st.button("Perform Contextual Analysis"):
75
+ # Use run_pipeline to get predictions
76
+ predictions = run_pipeline(user_input)
77
+
78
+ # Extract prediction details
79
+ domain_label = predictions.get("domain_label", "Unknown")
80
+ domain_score = predictions.get("domain_score", 0)
81
+ discrimination_label = predictions.get("discrimination_label", "Unknown")
82
+ discrimination_score = predictions.get("discrimination_score", 0)
83
+
84
+ # Visualization layout
85
+ col1, col2 = st.columns(2)
86
+
87
+ with col1:
88
+ st.markdown("#### Domain Label")
89
+ st.markdown(f"## {domain_label}")
90
+ st.progress(domain_score)
91
+
92
+ with col2:
93
+ st.markdown("#### Discrimination Label")
94
+ st.markdown(f"## {discrimination_label}")
95
+ st.progress(discrimination_score)
96
+
97
+ col3, col4 = st.columns(2)
98
+
99
+ with col3:
100
+ # Display Domain Score in Bold
101
+ st.markdown(f'**Domain Score: {domain_score:.2f}**', unsafe_allow_html=True)
102
+
103
+ with col4:
104
+ # Display Discrimination Score in Bold
105
+ st.markdown(f'**Discrimination Score: {discrimination_score:.2f}**', unsafe_allow_html=True)
106
+
107
+ # Visualization for Domain Distribution
108
  def create_pie_chart(df, column, title):
109
  fig = px.pie(df, names=column, title=title, hole=0.35)
110
  fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
 
131
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=10))
132
  return fig
133
 
 
134
  # Visualization for Correlation between Sentiment and Discrimination
135
  def create_sentiment_discrimination_grouped_chart(df):
136
  # Creating a crosstab of 'Sentiment' and 'Discrimination'
137
  crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination'])
138
 
139
+ # Check if 'Discriminative' and 'Non Discriminative' are in the columns after the crosstab operation
140
  value_vars = crosstab_df.columns.intersection(['Discriminative', 'Non Discriminative']).tolist()
141
 
142
+ # If 'Non Discriminative' is not in columns, it will not be included in melting
143
  melted_df = pd.melt(crosstab_df.reset_index(), id_vars='Sentiment', value_vars=value_vars, var_name='Discrimination', value_name='Count')
144
 
145
  # Proceeding to plot only if we have data to plot
 
150
  else:
151
  return "No data to display for the selected filters."
152
 
 
 
153
  # Function for Top Domains with Negative Sentiment Chart
154
  def create_top_negative_sentiment_domains_chart(df):
155
  domain_counts = df.groupby(['Domain', 'Sentiment']).size().unstack(fill_value=0)
 
160
  colors = ['limegreen', 'crimson', 'darkcyan']
161
  fig = px.bar(domain_counts_subset, x='Count', y='Domain', title='Top Domains with Negative Sentiment', color='Domain',
162
  orientation='h', color_discrete_sequence=colors)
163
+ fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Negative Sentiment Content Count", yaxis_title="Domain", font=dict(size=10))
164
  return fig
165
 
166
  # Function for Key Phrases in Negative Sentiment Content Chart
 
170
  count_values = trigrams.toarray().sum(axis=0)
171
  ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
172
  ngram_freq.columns = ['frequency', 'ngram']
173
+ fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key Phrases in Negative Sentiment Content')
174
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
175
  return fig
176
 
 
191
  ngram_freq.columns = ['frequency', 'ngram']
192
 
193
  # Create the bar chart
194
+ fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key Phrases in Positive Sentiment Content')
195
 
196
  # Update layout settings
197
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=10))
198
 
199
  return fig
200
 
 
201
  # Function for Prevalence of Discriminatory Content Chart
202
  def create_prevalence_discriminatory_content_chart(df):
203
  domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
 
221
  def create_sentiment_distribution_by_channel_chart(df):
222
  sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
223
  color_map = {'Positive': 'blue', 'Neutral': 'lightblue', 'Negative': 'red'}
224
+ fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group', color_discrete_map=color_map)
225
  fig.update_layout(margin=dict(l=20, r=20, t=50, b=20), xaxis_title="Channel", yaxis_title="Counts", font=dict(size=10), title_x=0.5)
226
  return fig
227
 
 
232
  fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=50, b=20), font=dict(size=10), title_x=0.5)
233
  return fig
234
 
 
235
  # Function for rendering dashboard
236
  def render_dashboard(page, df_filtered):
237
  if page == "Dashboard for GESI Conversations in Sri Lanka":
238
  render_prediction_page()
239
  elif page == "GESI Overview":
240
+ st.title("GESI Overview Dashboard")
241
  col1, col2 = st.columns(2)
242
  with col1:
243
  st.plotly_chart(create_pie_chart(df_filtered, 'Domain', 'Distribution of Domains'))
 
284
  with col2:
285
  st.plotly_chart(create_channel_discrimination_chart(df_filtered))
286
 
 
287
  # Render the selected dashboard page
288
  render_dashboard(page, df_filtered)