import streamlit as st import pandas as pd import matplotlib.pyplot as plt from sklearn.feature_extraction.text import CountVectorizer import seaborn as sns import plotly.express as px import plotly.io as pio import plotly.graph_objects as go # Set page configuration st.set_page_config(layout="wide") # Read data into dataframes df1 = pd.read_csv("data/reviewed_social_media_english.csv") df2 = pd.read_csv("data/reviewed_news_english.csv") df3 = pd.read_csv("data/tamil_social_media.csv") df4 = pd.read_csv("data/tamil_news.csv") # Normalize Text df1['Domain'].replace("MUSLIM", "Muslim", inplace=True) df2['Domain'].replace("MUSLIM", "Muslim", inplace=True) df3['Domain'].replace("MUSLIM", "Muslim", inplace=True) df4['Domain'].replace("MUSLIM", "Muslim", inplace=True) # Drop irrelevant data frames = [df1, df2, df3, df4] for df in frames: df = df[df['Domain'] != 'Not relevant'] df = df[df['Domain'] != 'None'] df = df[df['Discrimination'] != 'None'] df = df[df['Sentiment'] != 'None'] # Concatenate/merge dataframes df = pd.concat(frames) # Visualization function def create_visualizations(df): # [Existing visualization code] pass # Page navigation page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]) if page == "Overview": create_visualizations(df) # Placeholder for overview visualizations elif page == "Sentiment Analysis": create_visualizations(df) # Placeholder for sentiment analysis visualizations elif page == "Discrimination Analysis": create_visualizations(df) # Placeholder for discrimination analysis visualizations elif page == "Channel Analysis": create_visualizations(df) # Placeholder for channel analysis visualizations # [Place the rest of the code for the visualizations here] # Define a color palette for consistent visualization styles color_palette = px.colors.sequential.Viridis # Function for Domain Distribution Chart def create_domain_distribution_chart(df): fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35) fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1)) fig.update_traces(marker=dict(colors=color_palette)) return fig # Function for Sentiment Distribution Across Domains Chart def create_sentiment_distribution_chart(df): # ... [Include the existing code for the Sentiment Distribution chart] fig.update_layout(margin=dict(l=20, r=20, t=40, b=20)) return fig # ... [Define other chart functions following the same pattern] # Function for Channel-wise Sentiment Over Time Chart def create_channel_sentiment_over_time_chart(df): df['Date'] = pd.to_datetime(df['Date']) timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0) fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel') fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20)) return fig # Function for Channel-wise Distribution of Discriminative Content Chart def create_channel_discrimination_chart(df): channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0) fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group') fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20)) return fig # Dashboard Layout def render_dashboard(): # Overview page layout if page == "Overview": st.header("Overview of Domains and Sentiments") col1, col2 = st.beta_columns(2) with col1: st.plotly_chart(create_domain_distribution_chart(df)) with col2: st.plotly_chart(create_sentiment_distribution_chart(df)) # ... [Additional overview charts] # ... [Other pages] # Sidebar Filters domain_filter = st.sidebar.multiselect('Select Domain', options=df['Domain'].unique(), default=df['Domain'].unique()) channel_filter = st.sidebar.multiselect('Select Channel', options=df['Channel'].unique(), default=df['Channel'].unique()) sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=df['Sentiment'].unique(), default=df['Sentiment'].unique()) discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=df['Discrimination'].unique(), default=df['Discrimination'].unique()) # Apply the filters df_filtered = df[df['Domain'].isin(domain_filter) & df['Channel'].isin(channel_filter) & df['Sentiment'].isin(sentiment_filter) & df['Discrimination'].isin(discrimination_filter)] # Render the dashboard with filtered data render_dashboard(df_filtered)