Roberta2024 commited on
Commit
20ea5d1
·
verified ·
1 Parent(s): 4d22569

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pandas as pd
4
+ import matplotlib as mpl
5
+ import matplotlib.font_manager as fm
6
+ import matplotlib.pyplot as plt
7
+ import streamlit as st
8
+ import json
9
+ import time
10
+
11
+ # Set up Streamlit app title
12
+ st.title("MOMO & PCHOME 商品搜索和價格分析")
13
+
14
+ # Get user input
15
+ search_keyword = st.text_input("請輸入要搜索的關鍵字: ", "筆電")
16
+ page_number = st.number_input("請輸入要搜索的頁數: ", min_value=1, max_value=100, value=1, step=1)
17
+
18
+ # Create a button to start the scraping process
19
+ if st.button("開始搜索"):
20
+ start_time = time.time()
21
+
22
+ # MOMO scraping
23
+ momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"
24
+ momo_headers = {
25
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
26
+ }
27
+ momo_payload = {
28
+ "host": "momoshop",
29
+ "flag": "searchEngine",
30
+ "data": {
31
+ "searchValue": search_keyword,
32
+ "curPage": str(page_number),
33
+ "priceS": "0",
34
+ "priceE": "9999999",
35
+ "searchType": "1"
36
+ }
37
+ }
38
+ momo_response = requests.post(momo_url, headers=momo_headers, json=momo_payload)
39
+
40
+ if momo_response.status_code == 200:
41
+ momo_data = momo_response.json().get('rtnSearchData', {}).get('goodsInfoList', [])
42
+ momo_product_list = []
43
+ for product in momo_data:
44
+ name = product.get('goodsName', '')
45
+ price = product.get('goodsPrice', '')
46
+ price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
47
+ try:
48
+ product_price = float(price_str)
49
+ except ValueError:
50
+ product_price = 0
51
+ momo_product_list.append({'title': name, 'price': product_price, 'platform': 'MOMO'})
52
+
53
+ momo_df = pd.DataFrame(momo_product_list)
54
+ st.write("MOMO 商品數據:", momo_df)
55
+
56
+ # MOMO data analysis
57
+ momo_avg_price = momo_df['price'].mean()
58
+ st.write(f"MOMO 平均價格: {momo_avg_price:.2f}")
59
+ st.write(f"MOMO 最高價格: {momo_df['price'].max():.2f}")
60
+ st.write(f"MOMO 最低價格: {momo_df['price'].min():.2f}")
61
+
62
+ # MOMO visualization
63
+ font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
64
+ font_response = requests.get(font_url)
65
+ with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
66
+ font_file.write(font_response.content)
67
+ fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
68
+ mpl.rc('font', family='Taipei Sans TC Beta')
69
+
70
+ fig, ax = plt.subplots(figsize=(30, 15))
71
+ momo_df['price'][:70].plot(ax=ax, marker='o', linestyle='-', color='skyblue', linewidth=2, markersize=8)
72
+ plt.title(f'MOMO 電商網站上 "{search_keyword}" 的銷售價格', fontsize=30, fontweight='bold', color='navy')
73
+ plt.axhline(y=momo_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {momo_avg_price:.2f}')
74
+ plt.xlabel('商品索引', fontsize=20, color='gray')
75
+ plt.ylabel('價格', fontsize=20, color='gray')
76
+ plt.legend(fontsize=12, loc='upper left')
77
+ plt.grid(axis='y', linestyle='--', alpha=0.5)
78
+ plt.xticks(rotation=45, ha='right', fontsize=12, color='gray')
79
+ plt.yticks(fontsize=12, color='gray')
80
+ ax.set_facecolor('#f8f8f8')
81
+ plt.tight_layout()
82
+ st.pyplot(fig)
83
+ else:
84
+ st.error(f"MOMO 請求失敗,狀態碼: {momo_response.status_code}")
85
+
86
+ # PCHOME scraping
87
+ pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
88
+ pchome_data = pd.DataFrame()
89
+
90
+ for i in range(1, page_number + 1):
91
+ pchome_url = f'{pchome_base_url}{search_keyword}&page={i}&sort=sale/dc'
92
+ pchome_response = requests.get(pchome_url)
93
+ if pchome_response.status_code == 200:
94
+ pchome_json_data = json.loads(pchome_response.content)
95
+ pchome_df = pd.DataFrame(pchome_json_data['prods'])
96
+ pchome_df['platform'] = 'PCHOME' # Add platform identifier
97
+ pchome_data = pd.concat([pchome_data, pchome_df])
98
+ time.sleep(1)
99
+ else:
100
+ st.error(f"PCHOME 請求失敗,狀態碼: {pchome_response.status_code}")
101
+
102
+ if not pchome_data.empty:
103
+ st.write("PCHOME 商品數據:", pchome_data)
104
+
105
+ # PCHOME data analysis
106
+ pchome_avg_price = pchome_data['price'].mean()
107
+ st.write(f"PCHOME 平均價格: {pchome_avg_price:.2f}")
108
+ st.write(f"PCHOME 最高價格: {pchome_data['price'].max():.2f}")
109
+ st.write(f"PCHOME 最低價格: {pchome_data['price'].min():.2f}")
110
+
111
+ # PCHOME visualization
112
+ fig, ax = plt.subplots(figsize=(15, 8))
113
+ ax.plot(pchome_data.index[:70], pchome_data['price'][:70], 'o', color='skyblue', markersize=8)
114
+ ax.set_title(f'PCHOME 電商網站上 "{search_keyword}" 的銷售價格', fontsize=20, fontweight='bold')
115
+ ax.axhline(y=pchome_avg_price, color='red', linestyle='--', linewidth=2, label=f'參考價格: {pchome_avg_price:.2f}')
116
+ ax.set_xlabel('商品索引', fontsize=14)
117
+ ax.set_ylabel('價格', fontsize=14)
118
+ ax.tick_params(axis='x', rotation=45, labelsize=12)
119
+ ax.tick_params(axis='y', labelsize=12)
120
+ ax.legend(fontsize=12, loc='upper left')
121
+ ax.grid(axis='y', linestyle='--', alpha=0.7)
122
+ plt.tight_layout()
123
+ st.pyplot(fig)
124
+
125
+ # Combine MOMO and PCHOME data
126
+ combined_data = pd.concat([momo_df, pchome_data], ignore_index=True)
127
+
128
+ # Add a button to download the combined data as CSV
129
+ csv = combined_data.to_csv(index=False).encode('utf-8')
130
+ st.download_button(
131
+ label="下載CSV檔案",
132
+ data=csv,
133
+ file_name=f'{search_keyword}_combined_data.csv',
134
+ mime='text/csv'
135
+ )
136
+
137
+ end_time = time.time()
138
+ st.write(f"Execution time: {end_time - start_time:.2f} seconds")