Rooobert's picture
Update app.py
943ec55 verified
import os
import requests
import pandas as pd
import streamlit as st
import json
import time
from pytrends.request import TrendReq
import plotly.express as px
import plotly.graph_objects as go
from tenacity import retry, wait_exponential, stop_after_attempt
# Set up Streamlit app title
st.title("🐣MOMO 🆚 PCHOME 商品搜索和 Google Trends 分析👁️‍🗨️")
# Get user input for keyword
keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")
# Get date range input for Google Trends
start_date = st.date_input("請選擇開始日期", value=pd.to_datetime("2024-08-01"))
end_date = st.date_input("請選擇結束日期", value=pd.to_datetime("2024-08-11"))
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)
# Format timeframe for Google Trends
search_timeframe = f"{start_date} {end_date}"
# Create a button to start the scraping process
if st.button("開始搜索"):
start_time = time.time()
# MOMO scraping
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
momo_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
}
momo_payload = {
"host": "momoshop",
"flag": "searchEngine",
"data": {
"searchValue": keyword,
"curPage": str(page_number),
"priceS": "0",
"priceE": "9999999",
"searchType": "1"
}
}
momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
if momo_response.status_code == 200:
momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
momo_product_list = []
for product in momo_data:
name = product.get('goodsName', '')
price = product.get('goodsPrice', '')
price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
try:
product_price = float(price_str)
except ValueError:
product_price = 0
momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
momo_df = pd.DataFrame(momo_product_list)
st.write("MOMO 商品數據:", momo_df)
# MOMO data analysis
momo_avg_price = momo_df['price'].mean()
st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
# MOMO visualization with Plotly
fig = px.scatter(momo_df[:70], x='title', y='price', hover_data=['title'],
title=f'MOMO 電商網站上 "{keyword}" 的銷售價格',
labels={'title': '商品名稱', 'price': '價格'})
fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(momo_df[:70]))), ticktext=momo_df['title'][:70])
fig.add_hline(y=momo_avg_price, line_dash="dash", line_color="red",
annotation_text=f"參考價格: {momo_avg_price:.2f}",
annotation_position="bottom right")
fig.update_layout(height=600)
st.plotly_chart(fig)
# MOMO Sunburst Chart
momo_sunburst_data = momo_df.copy()
momo_sunburst_data['price_range'] = pd.cut(momo_sunburst_data['price'],
bins=[0, 1000, 5000, 10000, 50000, float('inf')],
labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+'])
fig = px.sunburst(momo_sunburst_data, path=['price_range', 'title'], values='price',
title=f'MOMO "{keyword}" 價格分佈 (Sunburst 圖)')
fig.update_layout(height=800)
st.plotly_chart(fig)
else:
st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}")
# PCHOME scraping
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
pchome_data = pd.DataFrame()
for i in range(1, page_number + 1):
pchome_url = f'{pchome_base_url}{keyword}&page={i}&sort=sale/dc'
pchome_response = requests.get(pchome_url)
if pchome_response.status_code == 200:
pchome_json_data = json.loads(pchome_response.content)
pchome_df = pd.DataFrame(pchome_json_data['prods'])
# Safely select only available columns
available_columns = ['name', 'describe', 'price', 'orig']
selected_columns = [col for col in available_columns if col in pchome_df.columns]
pchome_df = pchome_df[selected_columns]
if 'orig' in pchome_df.columns:
pchome_df = pchome_df.rename(columns={'orig': 'original_price'})
pchome_df['platform'] = 'PCHOME' # Add platform identifier
pchome_df['price'] = pchome_df['price'].astype(float) # Ensure price is float
pchome_data = pd.concat([pchome_data, pchome_df])
time.sleep(1)
else:
st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}")
if not pchome_data.empty:
st.write("PCHOME 商品數據:", pchome_data)
# PCHOME data analysis
pchome_avg_price = pchome_data['price'].mean()
st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
# PCHOME visualization with Plotly
fig = px.scatter(pchome_data[:70], x='name', y='price', hover_data=['name'],
title=f'PCHOME 電商網站上 "{keyword}" 的銷售價格',
labels={'name': '商品名稱', 'price': '價格'})
fig.update_xaxes(tickangle=45, tickmode='array', tickvals=list(range(len(pchome_data[:70]))), ticktext=pchome_data['name'][:70])
fig.add_hline(y=pchome_avg_price, line_dash="dash", line_color="red",
annotation_text=f"參考價格: {pchome_avg_price:.2f}",
annotation_position="bottom right")
fig.update_layout(height=600)
st.plotly_chart(fig)
# PCHOME Sunburst Chart
pchome_sunburst_data = pchome_data.copy()
pchome_sunburst_data['price_range'] = pd.cut(pchome_sunburst_data['price'],
bins=[0, 1000, 5000, 10000, 50000, float('inf')],
labels=['0-1000', '1001-5000', '5001-10000', '10001-50000', '50000+'])
fig = px.sunburst(pchome_sunburst_data, path=['price_range', 'name'], values='price',
title=f'PCHOME "{keyword}" 價格分佈 (Sunburst 圖)')
fig.update_layout(height=800)
st.plotly_chart(fig)
# Combine MOMO and PCHOME data
combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
st.write("合併的商品數據:", combined_data)
# Data analysis on combined data
combined_avg_price = combined_data['price'].mean()
st.write(f"合併後的平均價格: {combined_avg_price:.2f}")
# Google Trends analysis
st.subheader("Google趨勢分析")
# Retry mechanism with exponential backoff
@retry(wait=wait_exponential(multiplier=1, min=4, max=60), stop=stop_after_attempt(5))
def fetch_trends_data(pytrend):
return pytrend.interest_over_time()
pytrend = TrendReq(hl="zh-TW", tz=-480)
pytrend.build_payload(
kw_list=[keyword],
cat=3,
timeframe=search_timeframe,
geo="TW",
gprop=""
)
try:
trends_df = fetch_trends_data(pytrend)
trends_df = trends_df.drop(["isPartial"], axis=1)
# 使用Plotly創建趨勢圖
fig = px.line(trends_df, x=trends_df.index, y=keyword,
title=f"Google趨勢 - '{keyword}' 的趨勢分析")
fig.update_traces(mode='lines+markers')
fig.update_layout(xaxis_title="時間", yaxis_title="興趣指數", height=600)
st.plotly_chart(fig)
# 顯示趨勢數據統計
st.write("趨勢數據統計:")
st.write(trends_df.describe())
except Exception as e:
st.error(f"獲取Google趨勢數據時出錯: {e}")
end_time = time.time()
st.write(f"執行時間: {end_time - start_time:.2f} 秒") #