File size: 4,368 Bytes
c4b27ba
 
 
 
3114f5a
c4b27ba
 
 
3114f5a
c4b27ba
 
3114f5a
 
 
 
 
 
 
 
 
c4b27ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3114f5a
 
 
 
 
c4b27ba
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import plotly.graph_objects as go
import plotly.express as px

# ๅˆ†ๆžใƒกใƒ‹ใƒฅใƒผ
analysis_menu_list = ["ๆ™‚็ณปๅˆ—ๅˆ†ๆž","ใƒใ‚นใ‚ฑใƒƒใƒˆๅˆ†ๆž"]

# ๅˆ†ๆžใƒกใƒ‹ใƒฅใƒผใ”ใจใฎSQL
def create_sql(analysis_menu, country, start_date, end_date):
    if analysis_menu == "ๆ™‚็ณปๅˆ—ๅˆ†ๆž":
        sql = f"""
        SELECT
        CAST(InvoiceDate AS DATE) AS YearMonthDate,
        COUNT(DISTINCT CustomerID) AS Num_of_Purchaser,
        SUM(Quantity) AS Total_of_Amount,
        SUM(UnitPrice * Quantity) AS SalesTotal
        FROM df
        WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}'
        AND {country}
        GROUP BY YearMonthDate
        ORDER BY YearMonthDate
        """

    elif analysis_menu == "ใƒใ‚นใ‚ฑใƒƒใƒˆๅˆ†ๆž":
        sql = f"""
        WITH
        t_all AS(
        -- ็ทๆฅๅบ—่€…ๆ•ฐ
        SELECT
        COUNT(DISTINCT CustomerID) AS Num_of_All
        FROM df
        WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}'
        AND {country}
        ),

        t_purchaser AS(
        -- ๅ•†ๅ“ใ”ใจใฎ่ณผๅ…ฅ่€…ๆ•ฐ
        SELECT
            CAST(StockCode AS STRING) AS ProductID, Description, COUNT(DISTINCT CustomerID) AS Num_of_Purchaser
        FROM df
        WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}'
        AND {country}
        GROUP BY ProductID, Description
        ),

        t_simultaneous_purchaser_pre AS(
        -- ๅ•†ๅ“ใ”ใจใฎๅŒๆ™‚่ณผๅ…ฅ่€…
        SELECT
            DISTINCT CAST(StockCode AS STRING) AS ProductID, Description, CAST(InvoiceDate AS DATE) Purchase_date, CustomerID
        FROM df
        WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}'
        AND {country}
        ),

        t_simultaneous_purchaser AS(
        -- ๅ•†ๅ“ใ”ใจใฎๅŒๆ™‚่ณผๅ…ฅ่€…ๆ•ฐ
        SELECT
            t1.ProductID as ProductID_A, t1.Description AS Description_A, t2.ProductID as ProductID_B, t2.Description AS Description_B, COUNT(DISTINCT CustomerID) as Num_of_Simultaneous_Purchaser
        FROM t_simultaneous_purchaser_pre as t1
        INNER JOIN t_simultaneous_purchaser_pre as t2
        USING(Purchase_date, CustomerID)
        WHERE t1.ProductID != t2.ProductID
        GROUP BY t1.ProductID, t1.Description, t2.ProductID, t2.Description
        )

        SELECT
        ProductID_A, Description_A,
        t_purchaser.Num_of_Purchaser AS Num_of_Purchaser_A,
        t_purchaser.Num_of_Purchaser / Num_of_All AS PurchaseRate_A,
        ProductID_B, Description_B,
        t2.Num_of_Purchaser AS Num_of_Purchaser_B,
        t2.Num_of_Purchaser / Num_of_All AS PurchaseRate_B,
        Num_of_Simultaneous_Purchaser,
        Num_of_Simultaneous_Purchaser / t_purchaser.Num_of_Purchaser AS CombinedSalesRate,
        (Num_of_Simultaneous_Purchaser / t_purchaser.Num_of_Purchaser) / (t2.Num_of_Purchaser / Num_of_All) AS Lift

        FROM t_purchaser
        LEFT OUTER JOIN t_simultaneous_purchaser
        ON t_purchaser.ProductID = t_simultaneous_purchaser.ProductID_A
        AND t_purchaser.Description = t_simultaneous_purchaser.Description_A

        LEFT OUTER JOIN t_purchaser as t2
        ON t_simultaneous_purchaser.ProductID_B = t2.ProductID
        AND t_simultaneous_purchaser.Description_B = t2.Description

        FULL OUTER JOIN t_all
        ON True

        -- ใƒ‡ใƒผใ‚ฟใŒๅคšใใชใ‚Šใ™ใŽใ‚‹ใฎใงใ€ไธŠไฝ10ๅ•†ๅ“ๅŒๅฃซใฎ็ต„ใฟๅˆใ‚ใ›ใซ้™ๅฎš
        WHERE
        Description_A in (SELECT Description FROM t_purchaser ORDER BY Num_of_Purchaser DESC LIMIT 10)
        AND
        Description_B in (SELECT Description FROM t_purchaser ORDER BY Num_of_Purchaser DESC LIMIT 10)
        """

    return sql


# ๅˆ†ๆžใƒกใƒ‹ใƒฅใƒผใ”ใจใฎใ‚ฐใƒฉใƒ•
def create_graph(analysis_menu, df):
    if analysis_menu == "ๆ™‚็ณปๅˆ—ๅˆ†ๆž":
        # ๆŠ˜ใ‚Œ็ทšใ‚ฐใƒฉใƒ•
        fig = px.line(df, x='YearMonthDate', y='Total_of_Amount')
    
    elif analysis_menu == "ใƒใ‚นใ‚ฑใƒƒใƒˆๅˆ†ๆž":
        # ใƒ’ใƒผใƒˆใƒžใƒƒใƒ—
        df = df.sort_values(["Description_A","Description_B"], ascending=[True, False]).reset_index()
        fig = go.Figure([go.Heatmap(z=df.CombinedSalesRate,
                                 x=df.Description_A.values,
                                 y=df.Description_B.values)])

    return fig