Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib as mpl
|
5 |
+
import matplotlib.font_manager as fm
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import streamlit as st
|
8 |
+
import json
|
9 |
+
import time
|
10 |
+
|
11 |
+
# Set up Streamlit app title
|
12 |
+
st.title("MOMO & PCHOME 商品搜索和價格分析")
|
13 |
+
|
14 |
+
# Get user input
|
15 |
+
search_keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")
|
16 |
+
page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)
|
17 |
+
|
18 |
+
# Create a button to start the scraping process
|
19 |
+
if st.button("開始搜索"):
|
20 |
+
start_time = time.time()
|
21 |
+
|
22 |
+
# MOMO scraping
|
23 |
+
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
|
24 |
+
momo_headers = {
|
25 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
|
26 |
+
}
|
27 |
+
momo_payload = {
|
28 |
+
"host": "momoshop",
|
29 |
+
"flag": "searchEngine",
|
30 |
+
"data": {
|
31 |
+
"searchValue": search_keyword,
|
32 |
+
"curPage": str(page_number),
|
33 |
+
"priceS": "0",
|
34 |
+
"priceE": "9999999",
|
35 |
+
"searchType": "1"
|
36 |
+
}
|
37 |
+
}
|
38 |
+
momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
|
39 |
+
|
40 |
+
if momo_response.status_code == 200:
|
41 |
+
momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
|
42 |
+
momo_product_list = []
|
43 |
+
for product in momo_data:
|
44 |
+
name = product.get('goodsName', '')
|
45 |
+
price = product.get('goodsPrice', '')
|
46 |
+
price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
|
47 |
+
try:
|
48 |
+
product_price = float(price_str)
|
49 |
+
except ValueError:
|
50 |
+
product_price = 0
|
51 |
+
momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
|
52 |
+
|
53 |
+
momo_df = pd.DataFrame(momo_product_list)
|
54 |
+
st.write("MOMO 商品數據:", momo_df)
|
55 |
+
|
56 |
+
# MOMO data analysis
|
57 |
+
momo_avg_price = momo_df['price'].mean()
|
58 |
+
st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
|
59 |
+
st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
|
60 |
+
st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
|
61 |
+
|
62 |
+
# MOMO visualization
|
63 |
+
font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
|
64 |
+
font_response = requests.get(font_url)
|
65 |
+
with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
|
66 |
+
font_file.write(font_response.content)
|
67 |
+
fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
|
68 |
+
mpl.rc('font', family='Taipei Sans TC Beta')
|
69 |
+
|
70 |
+
fig, ax = plt.subplots(figsize=(30, 15))
|
71 |
+
momo_df['price'][:70].plot(ax=ax, marker='o', linestyle='-', color='skyblue', linewidth=2, markersize=8)
|
72 |
+
plt.title(f'MOMO 電商網站上 "{search_keyword}" 的銷售價格', fontsize=30, fontweight='bold', color='navy')
|
73 |
+
plt.axhline(y=momo_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {momo_avg_price:.2f}')
|
74 |
+
plt.xlabel('商品索引', fontsize=20, color='gray')
|
75 |
+
plt.ylabel('價格', fontsize=20, color='gray')
|
76 |
+
plt.legend(fontsize=12, loc='upper left')
|
77 |
+
plt.grid(axis='y', linestyle='--', alpha=0.5)
|
78 |
+
plt.xticks(rotation=45, ha='right', fontsize=12, color='gray')
|
79 |
+
plt.yticks(fontsize=12, color='gray')
|
80 |
+
ax.set_facecolor('#f8f8f8')
|
81 |
+
plt.tight_layout()
|
82 |
+
st.pyplot(fig)
|
83 |
+
else:
|
84 |
+
st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}")
|
85 |
+
|
86 |
+
# PCHOME scraping
|
87 |
+
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
|
88 |
+
pchome_data = pd.DataFrame()
|
89 |
+
|
90 |
+
for i in range(1, page_number + 1):
|
91 |
+
pchome_url = f'{pchome_base_url}{search_keyword}&page={i}&sort=sale/dc'
|
92 |
+
pchome_response = requests.get(pchome_url)
|
93 |
+
if pchome_response.status_code == 200:
|
94 |
+
pchome_json_data = json.loads(pchome_response.content)
|
95 |
+
pchome_df = pd.DataFrame(pchome_json_data['prods'])
|
96 |
+
pchome_df['platform'] = 'PCHOME' # Add platform identifier
|
97 |
+
pchome_data = pd.concat([pchome_data, pchome_df])
|
98 |
+
time.sleep(1)
|
99 |
+
else:
|
100 |
+
st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}")
|
101 |
+
|
102 |
+
if not pchome_data.empty:
|
103 |
+
st.write("PCHOME 商品數據:", pchome_data)
|
104 |
+
|
105 |
+
# PCHOME data analysis
|
106 |
+
pchome_avg_price = pchome_data['price'].mean()
|
107 |
+
st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
|
108 |
+
st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
|
109 |
+
st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
|
110 |
+
|
111 |
+
# PCHOME visualization
|
112 |
+
fig, ax = plt.subplots(figsize=(15, 8))
|
113 |
+
ax.plot(pchome_data.index[:70], pchome_data['price'][:70], 'o', color='skyblue', markersize=8)
|
114 |
+
ax.set_title(f'PCHOME 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
|
115 |
+
ax.axhline(y=pchome_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {pchome_avg_price:.2f}')
|
116 |
+
ax.set_xlabel('商品索引', fontsize=14)
|
117 |
+
ax.set_ylabel('價格', fontsize=14)
|
118 |
+
ax.tick_params(axis='x', rotation=45, labelsize=12)
|
119 |
+
ax.tick_params(axis='y', labelsize=12)
|
120 |
+
ax.legend(fontsize=12, loc='upper left')
|
121 |
+
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
122 |
+
plt.tight_layout()
|
123 |
+
st.pyplot(fig)
|
124 |
+
|
125 |
+
# Combine MOMO and PCHOME data
|
126 |
+
combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
|
127 |
+
|
128 |
+
# Add a button to download the combined data as CSV
|
129 |
+
csv = combined_data.to_csv(index=False).encode('utf-8')
|
130 |
+
st.download_button(
|
131 |
+
label="下載CSV檔案",
|
132 |
+
data=csv,
|
133 |
+
file_name=f'{search_keyword}_combined_data.csv',
|
134 |
+
mime='text/csv'
|
135 |
+
)
|
136 |
+
|
137 |
+
end_time = time.time()
|
138 |
+
st.write(f"Execution time: {end_time - start_time:.2f} seconds")
|