import plotly.graph_objects as go import plotly.express as px # 分析メニュー analysis_menu_list = ["時系列分析","バスケット分析"] # 分析メニューごとのSQL def create_sql(analysis_menu, country, start_date, end_date): if analysis_menu == "時系列分析": sql = f""" SELECT CAST(InvoiceDate AS DATE) AS YearMonthDate, COUNT(DISTINCT CustomerID) AS Num_of_Purchaser, SUM(Quantity) AS Total_of_Amount, SUM(UnitPrice * Quantity) AS SalesTotal FROM df WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}' AND {country} GROUP BY YearMonthDate ORDER BY YearMonthDate """ elif analysis_menu == "バスケット分析": sql = f""" WITH t_all AS( -- 総来店者数 SELECT COUNT(DISTINCT CustomerID) AS Num_of_All FROM df WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}' AND {country} ), t_purchaser AS( -- 商品ごとの購入者数 SELECT CAST(StockCode AS STRING) AS ProductID, Description, COUNT(DISTINCT CustomerID) AS Num_of_Purchaser FROM df WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}' AND {country} GROUP BY ProductID, Description ), t_simultaneous_purchaser_pre AS( -- 商品ごとの同時購入者 SELECT DISTINCT CAST(StockCode AS STRING) AS ProductID, Description, CAST(InvoiceDate AS DATE) Purchase_date, CustomerID FROM df WHERE CAST(InvoiceDate AS DATE) BETWEEN DATETIME '{start_date}' AND DATETIME '{end_date}' AND {country} ), t_simultaneous_purchaser AS( -- 商品ごとの同時購入者数 SELECT t1.ProductID as ProductID_A, t1.Description AS Description_A, t2.ProductID as ProductID_B, t2.Description AS Description_B, COUNT(DISTINCT CustomerID) as Num_of_Simultaneous_Purchaser FROM t_simultaneous_purchaser_pre as t1 INNER JOIN t_simultaneous_purchaser_pre as t2 USING(Purchase_date, CustomerID) WHERE t1.ProductID != t2.ProductID GROUP BY t1.ProductID, t1.Description, t2.ProductID, t2.Description ) SELECT ProductID_A, Description_A, t_purchaser.Num_of_Purchaser AS Num_of_Purchaser_A, t_purchaser.Num_of_Purchaser / Num_of_All AS PurchaseRate_A, ProductID_B, Description_B, t2.Num_of_Purchaser AS Num_of_Purchaser_B, t2.Num_of_Purchaser / Num_of_All AS PurchaseRate_B, Num_of_Simultaneous_Purchaser, Num_of_Simultaneous_Purchaser / t_purchaser.Num_of_Purchaser AS CombinedSalesRate, (Num_of_Simultaneous_Purchaser / t_purchaser.Num_of_Purchaser) / (t2.Num_of_Purchaser / Num_of_All) AS Lift FROM t_purchaser LEFT OUTER JOIN t_simultaneous_purchaser ON t_purchaser.ProductID = t_simultaneous_purchaser.ProductID_A AND t_purchaser.Description = t_simultaneous_purchaser.Description_A LEFT OUTER JOIN t_purchaser as t2 ON t_simultaneous_purchaser.ProductID_B = t2.ProductID AND t_simultaneous_purchaser.Description_B = t2.Description FULL OUTER JOIN t_all ON True -- データが多くなりすぎるので、上位10商品同士の組み合わせに限定 WHERE Description_A in (SELECT Description FROM t_purchaser ORDER BY Num_of_Purchaser DESC LIMIT 10) AND Description_B in (SELECT Description FROM t_purchaser ORDER BY Num_of_Purchaser DESC LIMIT 10) """ return sql # 分析メニューごとのグラフ def create_graph(analysis_menu, df): if analysis_menu == "時系列分析": # 折れ線グラフ fig = px.line(df, x='YearMonthDate', y='Total_of_Amount') elif analysis_menu == "バスケット分析": # ヒートマップ df = df.sort_values(["Description_A","Description_B"], ascending=[True, False]).reset_index() fig = go.Figure([go.Heatmap(z=df.CombinedSalesRate, x=df.Description_A.values, y=df.Description_B.values)]) return fig