File size: 3,137 Bytes
51abf05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
from matplotlib.gridspec import GridSpec

def average_sales_by_region(df):
    """
    Generate a bar plot for average sales by region.
    """
    df_bar = df[['region', 'sales']]
    df_bar = df_bar.groupby('region').mean().sort_values(by='sales', ascending=False)
    fig, ax = plt.subplots(figsize=[10, 6])
    sns.barplot(x=df_bar.index, y='sales', data=df_bar, palette='viridis', ax=ax)
    ax.set_title('Average Sales Across Different Regions')
    ax.set_xlabel('Region')
    ax.set_ylabel('Average Sales')
    for index, value in enumerate(df_bar['sales']):
        ax.text(index, value, f"{value:.2f}", ha='center', va='bottom')
    return fig

def average_sales_and_profit_over_time(df):
    """
    Generate a line plot for average sales and profit over time.
    """
    df_line = df[['order_date', 'sales', 'profit']].sort_values('order_date')
    df_line['order_date'] = pd.to_datetime(df_line['order_date'])
    df_line = df_line.groupby(df_line['order_date'].dt.to_period("M")).mean()
    df_line.index = df_line.index.to_timestamp()
    fig, ax = plt.subplots(figsize=[10, 6])
    ax.plot(df_line.index, 'sales', data=df_line, color='green', label='Avg Sales')
    ax.plot(df_line.index, 'profit', data=df_line, color='red', label='Avg Profit')
    ax.legend()
    ax.set_title('Average Sales and Profit Over Time (Monthly)')
    ax.set_xlabel('Time')
    ax.set_ylabel('Value')
    return fig

def segment_vs_region_distribution(df):
    """
    Generate a count plot for segments across different regions.
    """
    fig = plt.figure(figsize=(10, 6))
    sns.countplot(x='segment', data=df, hue='region', palette='viridis')
    plt.title('Segment vs. Region Distribution')
    plt.xlabel('Segment')
    plt.ylabel('Count')
    plt.legend(title='Region')
    return fig

def sales_vs_profit_across_segments(df):
    """
    Generate a scatter plot comparing sales and profit across different customer segments.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.scatterplot(x='sales', y='profit', hue='segment', data=df, palette='viridis', size='sales', sizes=(20, 200), ax=ax)
    ax.set_title('Sales vs. Profit Across Different Customer Segments')
    ax.set_xlabel('Sales')
    ax.set_ylabel('Profit')
    return fig

def category_composition_for_profit_and_sales(df):
    """
    Generate pie charts for the composition of category for profit and sales.
    """
    df_pie = df.groupby('category').agg({'sales': 'sum', 'profit': 'sum'}).reset_index()
    fig, axs = plt.subplots(1, 2, figsize=(14, 7))
    axs[0].pie(df_pie['sales'], labels=df_pie['category'], autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff','#99ff99','#ffcc99'])
    axs[0].set_title('Sales Composition by Category')
    axs[1].pie(df_pie['profit'], labels=df_pie['category'], autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff','#99ff99','#ffcc99'])
    axs[1].set_title('Profit Composition by Category')
    return fig

# Additional EDA functions can be added following the same pattern