File size: 5,674 Bytes
21ac434
7bf8be4
 
 
 
 
 
 
 
 
 
 
93b4f33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bf8be4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93b4f33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bf8be4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go

# Set page configuration
st.set_page_config(layout="wide")


def load_and_clean_data():
    df1 = pd.read_csv("data/reviewed_social_media_english.csv")
    df2 = pd.read_csv("data/reviewed_news_english.csv")
    df3 = pd.read_csv("data/tamil_social_media.csv")  
    df4 = pd.read_csv("data/tamil_news.csv")       

    # Normalize Text and Drop irrelevant data
    frames = [df1, df2, df3, df4]
    for frame in frames:
        frame['Domain'].replace("MUSLIM", "Muslim", inplace=True)
        frame.drop(frame[frame['Domain'] == 'Not relevant'].index, inplace=True)
        frame.drop(frame[frame['Domain'] == 'None'].index, inplace=True)
        frame.drop(frame[frame['Discrimination'] == 'None'].index, inplace=True)
        frame.drop(frame[frame['Sentiment'] == 'None'].index, inplace=True)

    # Concatenate/merge dataframes
    df_combined = pd.concat(frames)
    return df_combined

df = load_and_clean_data()

# Normalize Text
df1['Domain'].replace("MUSLIM", "Muslim", inplace=True)
df2['Domain'].replace("MUSLIM", "Muslim", inplace=True)
df3['Domain'].replace("MUSLIM", "Muslim", inplace=True)
df4['Domain'].replace("MUSLIM", "Muslim", inplace=True)

# Drop irrelevant data
frames = [df1, df2, df3, df4]
for df in frames:
    df = df[df['Domain'] != 'Not relevant']
    df = df[df['Domain'] != 'None']
    df = df[df['Discrimination'] != 'None']
    df = df[df['Sentiment'] != 'None']

# Concatenate/merge dataframes
df = pd.concat(frames)

# Visualization function
def create_visualizations(df):
    # [Existing visualization code]
    pass

# Page navigation
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])

if page == "Overview":
    create_visualizations(df)  # Placeholder for overview visualizations
elif page == "Sentiment Analysis":
    create_visualizations(df)  # Placeholder for sentiment analysis visualizations
elif page == "Discrimination Analysis":
    create_visualizations(df)  # Placeholder for discrimination analysis visualizations
elif page == "Channel Analysis":
    create_visualizations(df)  # Placeholder for channel analysis visualizations

# [Place the rest of the code for the visualizations here]


# Define a color palette for consistent visualization styles
color_palette = px.colors.sequential.Viridis

# Function for Domain Distribution Chart
def create_domain_distribution_chart(df):
    fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35)
    fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1))
    fig.update_traces(marker=dict(colors=color_palette))
    return fig

# Function for Sentiment Distribution Across Domains Chart
def create_sentiment_distribution_chart(df):
    # ... [Include the existing code for the Sentiment Distribution chart]
    fig.update_layout(margin=dict(l=20, r=20, t=40, b=20))
    return fig

# ... [Define other chart functions following the same pattern]

# Function for Channel-wise Sentiment Over Time Chart
def create_channel_sentiment_over_time_chart(df):
    df['Date'] = pd.to_datetime(df['Date'])
    timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0)
    fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel')
    fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20))
    return fig

# Function for Channel-wise Distribution of Discriminative Content Chart
def create_channel_discrimination_chart(df):
    channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
    fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
    fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
    return fig

# Dashboard Layout
def render_dashboard():
    # Overview page layout
    if page == "Overview":
        st.header("Overview of Domains and Sentiments")
        col1, col2 = st.beta_columns(2)
        with col1:
            st.plotly_chart(create_domain_distribution_chart(df))
        with col2:
            st.plotly_chart(create_sentiment_distribution_chart(df))
        # ... [Additional overview charts]

    # ... [Other pages]

# Define Sidebar Filters
domain_options = df['Domain'].unique()
channel_options = df['Channel'].unique()
sentiment_options = df['Sentiment'].unique()
discrimination_options = df['Discrimination'].unique()

domain_filter = st.sidebar.multiselect('Select Domain', options=domain_options, default=domain_options)
channel_filter = st.sidebar.multiselect('Select Channel', options=channel_options, default=channel_options)
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)

# Apply the filters to the dataframe
df_filtered = df[(df['Domain'].isin(domain_filter)) & 
                 (df['Channel'].isin(channel_filter)) & 
                 (df['Sentiment'].isin(sentiment_filter)) & 
                 (df['Discrimination'].isin(discrimination_filter))]

# Render the dashboard with filtered data
render_dashboard(df_filtered)