File size: 9,993 Bytes
210b96e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# State abbreviation to full name mapping
state_mapping = {
    'FL': 'Florida', 'CA': 'California', 'TX': 'Texas', 'GA': 'Georgia',
    'NY': 'New York', 'IL': 'Illinois', 'PA': 'Pennsylvania', 'NC': 'North Carolina',
    'NJ': 'New Jersey', 'MD': 'Maryland', 'VA': 'Virginia', 'OH': 'Ohio',
    'MI': 'Michigan', 'SC': 'South Carolina', 'AZ': 'Arizona', 'TN': 'Tennessee',
    'NV': 'Nevada', 'LA': 'Louisiana', 'AL': 'Alabama', 'MO': 'Missouri',
    'MA': 'Massachusetts', 'IN': 'Indiana', 'AR': 'Arkansas', 'WA': 'Washington',
    'CO': 'Colorado', 'MS': 'Mississippi', 'CT': 'Connecticut', 'MN': 'Minnesota',
    'WI': 'Wisconsin', 'KY': 'Kentucky', 'UT': 'Utah', 'DE': 'Delaware',
    'OR': 'Oregon', 'OK': 'Oklahoma', 'DC': 'District of Columbia', 'KS': 'Kansas',
    'IA': 'Iowa', 'NM': 'New Mexico', 'NE': 'Nebraska', 'HI': 'Hawaii',
    'RI': 'Rhode Island', 'ID': 'Idaho', 'WV': 'West Virginia', 'NH': 'New Hampshire',
    'ME': 'Maine', 'MT': 'Montana', 'ND': 'North Dakota', 'AK': 'Alaska',
    'SD': 'South Dakota', 'WY': 'Wyoming', 'VT': 'Vermont'
    # Removed territories and minor outlying islands not listed as states
}

# Function to plot top n most common categories
def plot_top_n(df, column, title, n=5, palette_name=None):
    # Generate a color sequence from the seaborn palette
    color_sequence = sns.color_palette(palette_name, n_colors=n).as_hex() if palette_name else None
    
    # Get top n most common values in the specified column
    counts = df[column].value_counts().reset_index()
    counts.columns = [column, 'Count']
    top_n = counts.head(n)
    
    # Create a horizontal bar plot with the seaborn color sequence and remove the legend
    fig = px.bar(top_n, y=column, x='Count', orientation='h', 
                 color=column, color_discrete_sequence=color_sequence)
    fig.update_layout(showlegend=False)
    return fig

# 1. Plotting top 5 most common products
def plot_top_5_products(df_new):
    # df_new = load_process_data(df)
    fig = plot_top_n(df_new, 'Product', 'Top 5 Most Common Products')
    return fig

# 2. Plotting Top 5 common issues
def plot_top_5_issues(df_new):
    # df_new = load_process_data(df)
    fig = plot_top_n(df_new, 'Issue', 'Top 5 Most Common Issues', palette_name='plasma')
    return fig

# 3. Plotting top 5 issues in each product category
def plot_top_5_issues_in_product(df_new):
    # Step 1: Group data by 'Product' and 'Issue', then count occurrences
    grouped_data = df_new.groupby(['Product', 'Issue']).size().reset_index(name='Count')

    # Calculate total issues per product for ordering
    total_issues_per_product = grouped_data.groupby('Product')['Count'].sum().reset_index(name='TotalIssues')

    # Sort products by total issues in descending order
    sorted_products = total_issues_per_product.sort_values('TotalIssues', ascending=False)

    # Step 2: Get top 5 issues for each product sorted by 'Count' in descending order
    top_issues_per_product = (grouped_data.groupby('Product', as_index=False)
                            .apply(lambda x: x.nlargest(5, 'Count'))
                            .reset_index(drop=True))

    # Merge to get the order column (TotalIssues) in top_issues_per_product for sorting
    top_issues_per_product = top_issues_per_product.merge(sorted_products, on='Product')

    # Sort top_issues_per_product DataFrame based on TotalIssues column to ensure the plot respects this order
    top_issues_per_product = top_issues_per_product.sort_values(by=['TotalIssues', 'Count'], ascending=[False, False])

    # Step 3: Create a vertical stacked bar chart
    fig = px.bar(top_issues_per_product, x='Product', y='Count', color='Issue',
                labels={'Count': 'Number of Complaints'}, 
                category_orders={'Product': sorted_products['Product'].tolist()}) # Explicitly set the order of products

    # Update layout to remove legend and adjust dimensions for clarity
    fig.update_layout(showlegend=False, width=900, height=600)
    return fig

# 4.Companies with the Most Complaints in 2023
def plot_top_10_companies_complaints(df_new):
    # Filter data for the year 2023
    df_2023 = df_new[df_new['Date received'].dt.year == 2023]

    # Group data by company name and count the number of complaints for each company
    company_complaint_counts = df_2023['Company'].value_counts()

    top_n = 10
    # Ensure the companies are sorted in ascending order for correct plotting
    top_companies = company_complaint_counts.head(top_n).sort_values(ascending=True)

    # Create a horizontal bar chart using Plotly Express with a nicer color scale
    fig = px.bar(
        x=top_companies.values,
        y=top_companies.index,
        orientation='h',
        color=top_companies.values, # This assigns a color based on the value
        color_continuous_scale=[(0.0, "green"),
                                (0.05, "yellow"),
                                (1.0, "red")], # This is an example of a nice color scale
        labels={'x': 'Number of Complaints', 'y': 'Company'}
    )

    fig.update_layout(
        xaxis=dict(
            title='Number of Complaints',
        ),
        yaxis=dict(
            tickfont=dict(size=10),
        ),
        height=500,
        width=800,
    )

    # To display a color bar, showing the mapping of colors to values
    fig.update_layout(coloraxis_showscale=False)
    return fig

# 5. Top 10 States with the Most Complaints
def plot_top_10_states_most_complaints(df_new):
    # Assuming df_new is your DataFrame and 'State' contains the abbreviations
    # Map state abbreviations to full names
    df_new['State Name'] = df_new['State'].map(state_mapping)

    # Calculate complaint counts by state
    state_complaint_counts = df_new['State Name'].value_counts()

    # Get top 10 states with the most complaint counts
    top_n = 10
    top_states = state_complaint_counts.head(top_n)

    # Create a horizontal bar chart using Plotly Express with a nice color scale
    fig = px.bar(
        x=top_states.values,
        y=top_states.index,
        orientation='h',
        color=top_states.values,  # Assign color based on values
        color_continuous_scale='Turbo',  # A nice color scale
        labels={'x': 'Number of Complaints', 'y': 'State'},
        category_orders={'y': top_states.index.tolist()}
    )

    fig.update_layout(
        yaxis=dict(
            tickfont=dict(size=10),
        ),
        xaxis=dict(
            tickangle=0,
        ),
        height=500,
        width=900,
    )

    # To display a color bar, showing the mapping of colors to values
    fig.update_layout(coloraxis_showscale=False)
    return fig

# 6. Top 10 States with the Least Complaints
def plot_top_10_states_least_complaints(df_new):
    # Map state abbreviations to full names
    df_new['State Name'] = df_new['State'].map(state_mapping)

    # Calculate complaint counts by state
    state_complaint_counts = df_new['State Name'].value_counts()

    # Get top 10 states with the most complaint counts
    top_n = 10
    top_states = state_complaint_counts.tail(top_n)

    # Create a horizontal bar chart using Plotly Express with a nice color scale
    fig = px.bar(
        x=top_states.values,
        y=top_states.index,
        orientation='h',
        color=top_states.values,  # Assign color based on values
        color_continuous_scale='Temps',  # A nice color scale
        labels={'x': 'Number of Complaints', 'y': 'State'},
        category_orders={'x': top_states.index.tolist()}
    )

    fig.update_layout(
        yaxis=dict(
            tickfont=dict(size=10),
        ),
        xaxis=dict(
            tickangle=0,
        ),
        height=500,
        width=900,
    )

    # To display a color bar, showing the mapping of colors to values
    fig.update_layout(coloraxis_showscale=False)

    return fig

# 7. Number of Complaints by Year
def complaints_by_year(df_new):
    monthly_complaints = df_new.copy()
    monthly_complaints = monthly_complaints[monthly_complaints['Date received'].dt.year != 2024]

    monthly_complaints['MonthYear'] = monthly_complaints['Date received'].dt.to_period('M').astype(str)
    monthly_complaints = monthly_complaints.groupby('MonthYear').size().reset_index(name = "NumComplaints")


    fig = px.line(monthly_complaints, x='MonthYear', y='NumComplaints',
                labels={'MonthYear': 'Year', 'NumComplaints': 'Number of Complaints'})

    fig.update_layout(
            width=900,
            height=400
        )
    return fig

# 8. Number of Complaints by State
def complaints_across_states(df_new):
    df_2023 = df_new[df_new['Date received'].dt.year == 2023]

    state_complaints = df_2023.groupby('State').size().reset_index(name='Num_complaints')
    state_complaints['Full_state_name'] = state_complaints['State'].apply(lambda x : state_mapping[x] if x in state_mapping else x)

    fig = px.choropleth(state_complaints,
                        locations='State',
                        locationmode='USA-states',
                        color='Num_complaints',
                        color_continuous_scale='Inferno',
                        scope="usa",
                        hover_name='Full_state_name')
    fig.add_scattergeo(
        locations=state_complaints['State'],    ###codes for states,
        locationmode='USA-states',
        text=state_complaints['State'],
        mode='text',
        hoverinfo='skip',
        textfont=dict(size = 8.5,color='white'))

    fig.update_layout(
        autosize = True,
        geo=dict(
            landcolor='rgb(217, 217, 217)',  
            lakecolor='rgb(255, 255, 255)',  
            bgcolor='rgb(255, 255, 255)' 
        ),
        paper_bgcolor='rgb(255, 255, 255)', 
        margin={"r":0,"t":50,"l":0,"b":0},
        width=1000,
        height=400
    )
    return fig