import os import requests import pandas as pd import matplotlib as mpl import matplotlib.font_manager as fm import matplotlib.pyplot as plt import streamlit as st import json import time from pytrends.request import TrendReq import plotly.express as px from datetime import datetime, timedelta # Set up Streamlit app title st.title("🐣MOMO 🆚 PCHOME 商品搜索和價格分析👁️‍🗨️") # Get user input search_keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電") page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1) # 在Pytrends Analysis部分之前添加日期選擇器 st.subheader("Google Trends 分析時間範圍") default_end_date = datetime.now().date() default_start_date = default_end_date - timedelta(days=7) start_date = st.date_input("開始日期", value=default_start_date) end_date = st.date_input("結束日期", value=default_end_date) if start_date <= end_date: # Pytrends Analysis pytrend = TrendReq(hl="zh-TW", tz=-480) keywords = search_keyword timeframe = f"{start_date} {end_date}" pytrend.build_payload( kw_list=[keywords], cat=3, timeframe=timeframe, geo="TW", gprop="") df = pytrend.interest_over_time() if not df.empty: if "isPartial" in df.columns: df = df.drop(["isPartial"], axis=1) # Plotting Trend Data fig, ax = plt.subplots(figsize=(12, 8), dpi=80) ax.plot(df.index, df[keywords], label=keywords, lw=3.0, marker='o', markersize=8, color='#4285F4', linestyle='-') ax.set_title(f"Interest Over Time for {search_keyword}", fontsize=20, fontweight='bold', color='#4285F4') ax.set_xlabel("時間", fontsize=14, fontweight='bold', color='#4285F4') ax.set_ylabel("熱搜度", fontsize=14, fontweight='bold', color='#4285F4') ax.legend() ax.grid(True, linestyle='--', alpha=0.6) # Removing spines ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # Customize tick colors ax.tick_params(axis='x', colors='#4285F4') ax.tick_params(axis='y', colors='#4285F4') # Customize legend legend = ax.legend() legend.get_frame().set_alpha(0.5) legend.get_lines()[0].set_linestyle('-') # Display the plot plt.tight_layout() st.pyplot(fig) # Print out statistics st.write(df.describe()) else: st.write("在選定的時間範圍內沒有數據。請嘗試不同的日期範圍或關鍵字。") else: st.error("錯誤:結束日期必須在開始日期之後。") # Create a button to start the scraping process if st.button("開始搜索"): start_time = time.time() # MOMO scraping momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch" momo_headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36" } momo_payload = { "host": "momoshop", "flag": "searchEngine", "data": { "searchValue": search_keyword, "curPage": str(page_number), "priceS": "0", "priceE": "9999999", "searchType": "1" } } momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload) if momo_response.status_code == 200: momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', []) momo_product_list = [] for product in momo_data: name = product.get('goodsName', '') price = product.get('goodsPrice', '') price_str = str(price).split('(')[0].replace(',', '').replace('$', '') try: product_price = float(price_str) except ValueError: product_price = 0 momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'}) momo_df = pd.DataFrame(momo_product_list) st.write("MOMO 商品數據:", momo_df) # MOMO data analysis momo_avg_price = momo_df['price'].mean() st.write(f"MOMO 平均價格: {momo_avg_price:.2f}") st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}") st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}") # MOMO visualization font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" font_response = requests.get(font_url) with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file: font_file.write(font_response.content) fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf") mpl.rc('font', family='Taipei Sans TC Beta') fig, ax = plt.subplots(figsize=(15, 8)) ax.plot(momo_df.index[:70], momo_df['price'][:70], 'o', color='skyblue', markersize=8) ax.set_title(f'MOMO 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold') ax.axhline(y=momo_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {momo_avg_price:.2f}') ax.set_xlabel('商品索引', fontsize=14) ax.set_ylabel('價格', fontsize=14) ax.tick_params(axis='x', rotation=45, labelsize=12) ax.tick_params(axis='y', labelsize=12) ax.legend(fontsize=12, loc='upper left') ax.grid(axis='y', linestyle='--', alpha=0.7) plt.tight_layout() st.pyplot(fig) # MOMO Sunburst图 if not momo_df.empty: momo_sunburst = momo_df.groupby(['title', 'price']).size().reset_index(name='count') fig_momo_sunburst = px.sunburst( momo_sunburst, path=['title', 'price'], values='count', title='MOMO 商品價格分佈' ) fig_momo_sunburst.update_layout(margin=dict(t=50, l=25, r=25, b=25)) st.plotly_chart(fig_momo_sunburst) else: st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}") # PCHOME scraping pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=' pchome_data = pd.DataFrame() for i in range(1, page_number + 1): pchome_url = f'{pchome_base_url}{search_keyword}&page={i}&sort=sale/dc' pchome_response = requests.get(pchome_url) if pchome_response.status_code == 200: pchome_json_data = json.loads(pchome_response.content) pchome_df = pd.DataFrame(pchome_json_data['prods']) available_columns = ['name', 'describe', 'price', 'orig'] selected_columns = [col for col in available_columns if col in pchome_df.columns] pchome_df = pchome_df[selected_columns] if 'orig' in pchome_df.columns: pchome_df = pchome_df.rename(columns={'orig': 'original_price'}) pchome_df['platform'] = 'PCHOME' # Add platform identifier pchome_data = pd.concat([pchome_data, pchome_df]) time.sleep(1) else: st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}") if not pchome_data.empty: st.write("PCHOME 商品數據:", pchome_data) # PCHOME data analysis pchome_avg_price = pchome_data['price'].mean() st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}") st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}") st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}") # PCHOME visualization fig, ax = plt.subplots(figsize=(15, 8)) ax.plot(pchome_data.index[:70], pchome_data['price'][:70], 'o', color='skyblue', markersize=8) ax.set_title(f'PCHOME 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold') ax.axhline(y=pchome_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {pchome_avg_price:.2f}') ax.set_xlabel('商品索引', fontsize=14) ax.set_ylabel('價格', fontsize=14) ax.tick_params(axis='x', rotation=45, labelsize=12) ax.tick_params(axis='y', labelsize=12) ax.legend(fontsize=12, loc='upper left') ax.grid(axis='y', linestyle='--', alpha=0.7) plt.tight_layout() st.pyplot(fig) # PCHOME Sunburst图 if not pchome_data.empty: pchome_sunburst = pchome_data.groupby(['name', 'price']).size().reset_index(name='count') fig_pchome_sunburst = px.sunburst( pchome_sunburst, path=['name', 'price'], values='count', title='PCHOME 商品價格分佈' ) fig_pchome_sunburst.update_layout(margin=dict(t=50, l=25, r=25, b=25)) st.plotly_chart(fig_pchome_sunburst) # Combine MOMO and PCHOME data combined_data = pd.concat([momo_df, pchome_data], ignore_index=True) # Add a button to download the combined data as CSV csv = combined_data.to_csv(index=False, encoding='utf-8-sig').encode('utf-8-sig') st.download_button( label="下載CSV檔案", data=csv, file_name=f'{search_keyword}_combined_data.csv', mime='text/csv' ) end_time = time.time() st.write(f"Execution time: {end_time - start_time:.2f} seconds") # Pytrends Analysis pytrend = TrendReq(hl="zh-TW", tz=-480) keywords = search_keyword #20240204換關鍵字 pytrend.build_payload( kw_list=keywords, cat=3, timeframe="2024-06-18 2024-06-24", geo="TW", gprop="") df = pytrend.interest_over_time() if "isPartial" in df.columns: df = df.drop(["isPartial"], axis=1) # Plotting Trend Data fig, ax = plt.subplots(figsize=(12, 8), dpi=80) ax.plot(df.index, df[keywords[0]], label=keywords[0], lw=3.0, marker='o', markersize=8, color='#4285F4', linestyle='-') ax.set_title("Interest Over Time for "+search_keyword, fontsize=20, fontweight='bold', color='#4285F4') ax.set_xlabel("時間", fontsize=14, fontweight='bold', color='#4285F4') ax.set_ylabel("熱搜度", fontsize=14, fontweight='bold', color='#4285F4') ax.legend() ax.grid(True, linestyle='--', alpha=0.6) # Removing spines ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # Customize tick colors ax.tick_params(axis='x', colors='#4285F4') ax.tick_params(axis='y', colors='#4285F4') # Customize legend legend = ax.legend() legend.get_frame().set_alpha(0.5) legend.get_lines()[0].set_linestyle('-') # Display the plot plt.tight_layout() st.pyplot(fig) # Print out statistics st.write(df.describe())