|
import streamlit as st |
|
import pandas as pd |
|
|
|
from google_play_scraper import app, Sort, reviews, reviews_all, permissions, search |
|
import re |
|
from datetime import datetime |
|
import pytz |
|
|
|
|
|
|
|
@st.cache_data |
|
def get_url_by_app_name(nama_apl): |
|
""" |
|
Mengembalikan URL aplikasi berdasarkan nama aplikasi dari kamus. |
|
|
|
Parameters: |
|
- nama_apl (str): Nama aplikasi yang dicari. |
|
- aplikasi_dict (dict): Kamus yang memetakan nama aplikasi ke URL. |
|
|
|
Returns: |
|
- str or None: URL aplikasi atau None jika tidak ditemukan. |
|
""" |
|
list_url = [ |
|
'https://play.google.com/store/apps/details?id=com.shopee.id', |
|
'https://play.google.com/store/apps/details?id=com.tokopedia.tkpd', |
|
'https://play.google.com/store/apps/details?id=com.amazon.mShop.android.shopping', |
|
'https://play.google.com/store/apps/details?id=com.grabtaxi.passenger' |
|
] |
|
aplikasi_dict = { |
|
'Shopee': list_url[0], |
|
'Tokopedia': list_url[1], |
|
'Amazon': list_url[2], |
|
'Grab': list_url[3] |
|
} |
|
return aplikasi_dict.get(nama_apl, None) |
|
|
|
@st.cache_data |
|
def extract_app_id(play_store_url): |
|
|
|
pattern = r'id=([a-zA-Z0-9._]+)' |
|
|
|
|
|
match = re.search(pattern, play_store_url) |
|
|
|
|
|
if match: |
|
app_id = match.group(1) |
|
return app_id |
|
else: |
|
return None |
|
@st.cache_data(show_spinner = 'On progress, please wait...') |
|
def scraping_func(app_id, bahasa, negara, filter_score, jumlah): |
|
filter_score = None if filter_score == "Semua Rating" else filter_score |
|
|
|
rws, token = reviews( |
|
app_id, |
|
lang=bahasa, |
|
country=negara, |
|
sort=Sort.NEWEST, |
|
filter_score_with=filter_score, |
|
count=jumlah |
|
) |
|
|
|
scraping_done = bool(rws) |
|
|
|
return rws, token, scraping_done |
|
|
|
@st.cache_data(show_spinner = 'On progress, please wait...') |
|
def scraping_all_func(app_id, bahasa, negara, filter_score, sleep = 0): |
|
filter_score = None if filter_score == "Semua Rating" else filter_score |
|
|
|
rws = reviews_all( |
|
app_id, |
|
sleep_milliseconds=sleep, |
|
lang=bahasa, |
|
country=negara, |
|
filter_score_with=filter_score, |
|
) |
|
|
|
scraping_done = bool(rws) |
|
|
|
return rws, scraping_done |
|
|
|
@st.cache_data |
|
def buat_chart(df, target_year): |
|
st.write(f"Bar Chart Tahun {target_year}:") |
|
|
|
|
|
df['at'] = pd.to_datetime(df['at']) |
|
df['month'] = df['at'].dt.month |
|
df['year'] = df['at'].dt.year |
|
|
|
|
|
df_filtered = df[df['year'] == target_year] |
|
|
|
|
|
if df_filtered.empty: |
|
st.warning(f"Tidak ada data untuk tahun {target_year}.") |
|
return |
|
|
|
|
|
bulan_mapping = { |
|
1: f'Januari {target_year}', |
|
2: f'Februari {target_year}', |
|
3: f'Maret {target_year}', |
|
4: f'April {target_year}', |
|
5: f'Mei {target_year}', |
|
6: f'Juni {target_year}', |
|
7: f'Juli {target_year}', |
|
8: f'Agustus {target_year}', |
|
9: f'September {target_year}', |
|
10: f'Oktober {target_year}', |
|
11: f'November {target_year}', |
|
12: f'Desember {target_year}' |
|
} |
|
|
|
|
|
df_filtered['month'] = df_filtered['month'].replace(bulan_mapping) |
|
|
|
|
|
warna_score = { |
|
1: '#FF9AA2', |
|
2: '#FFB7B2', |
|
3: '#FFDAC1', |
|
4: '#E2F0CB', |
|
5: '#B5EAD7' |
|
} |
|
|
|
|
|
unique_scores = sorted(df_filtered['score'].unique()) |
|
|
|
|
|
months_order = [ |
|
f'Januari {target_year}', f'Februari {target_year}', f'Maret {target_year}', f'April {target_year}', f'Mei {target_year}', f'Juni {target_year}', |
|
f'Juli {target_year}', f'Agustus {target_year}', f'September {target_year}', f'Oktober {target_year}', f'November {target_year}', f'Desember {target_year}' |
|
] |
|
|
|
|
|
df_filtered['month'] = pd.Categorical(df_filtered['month'], categories=months_order, ordered=True) |
|
df_filtered = df_filtered.sort_values('month') |
|
|
|
|
|
st.bar_chart( |
|
df_filtered.groupby(['month', 'score']).size().unstack().fillna(0), |
|
color=[warna_score[score] for score in unique_scores] |
|
) |
|
|
|
utc_timezone = pytz.timezone('UTC') |
|
datetime_utc = datetime.now(utc_timezone) |
|
wib_timezone = pytz.timezone('Asia/Jakarta') |
|
dateNow = datetime_utc.astimezone(wib_timezone) |
|
|
|
|
|
dateSimple = dateNow.strftime("%A, %d %b %Y") |
|
timeNow = dateNow.strftime("%H:%M WIB") |
|
yearNow = dateNow.strftime("%Y") |
|
|
|
|
|
|
|
|
|
st.title("Data Everywhere : Scraping Playstore Reviews") |
|
scraping_done = False |
|
with st.sidebar : |
|
st.text(f"Today\t: {dateSimple}") |
|
st.text(f"Time\t: {timeNow}") |
|
with st.expander("Scraping Settings :"): |
|
scrape = st.selectbox("PIlih Metode :", ("Semua Reviews", "Estimasi Data"), index = 1) |
|
aplikasi = st.radio( |
|
"Pilih Input :", |
|
["Defaults", "Custom URL"], index = 0, |
|
captions = ["Shopee, Tokopedia, Amazon, Grab", "Tambahkan URL Manual"]) |
|
if aplikasi == "Defaults" : |
|
nama_apl = st.selectbox("Pilih Aplikasi :", ('Shopee', 'Tokopedia', 'Amazon', 'Grab')) |
|
if nama_apl : |
|
url = get_url_by_app_name(nama_apl) |
|
elif aplikasi == "Custom URL": |
|
url = st.text_input("Masukkan URL Aplikasi Pada Web Playstore :", 'https://play.google.com/store/apps/details?id=com.shopee.id') |
|
if scrape == "Estimasi Data" : |
|
jumlah = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 10, max_value = 25000, step = 10, placeholder="Type a number...") |
|
with st.expander("Preference Settings :"): |
|
if scrape == "Semua Reviews" : |
|
sleep = st.number_input("Masukkan sleep (milisecond) :", min_value = 1, max_value = 1000, step = 10, placeholder="Type a number...") |
|
bahasa = st.selectbox("Pilih Bahasa:", ('en', 'id')) |
|
negara = st.selectbox("Pilih Negara :", ('us', 'id')) |
|
filter_score = st.selectbox("Pilih Rating :", ('Semua Rating', 1, 2, 3, 4, 5)) |
|
target_year = st.selectbox("Pilih Tahun Bar Chart :", (2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025), index = 7) |
|
download_format = st.selectbox("Pilih Format Unduhan :", ["XLSX", "CSV", "JSON"]) |
|
st.info('Tekan "Mulai Scraping" kembali jika tampilan menghilang ', icon="ℹ️") |
|
|
|
if url and bahasa and negara and filter_score and download_format: |
|
if st.button ("Mulai Scraping") : |
|
app_id = extract_app_id(url) |
|
if scrape == "Semua Reviews" : |
|
reviews, scraping_done = scraping_all_func(app_id, bahasa, negara, filter_score, sleep) |
|
df = pd.DataFrame(reviews) |
|
elif scrape == "Estimasi Data": |
|
reviews, token, scraping_done = scraping_func(app_id, bahasa, negara, filter_score, jumlah) |
|
df = pd.DataFrame(reviews) |
|
else : |
|
st.warning("Masukkan pilihan yang valid") |
|
else : |
|
st.error("Mohon Masukkan Parameter.") |
|
|
|
tab1, tab2, tab3, tab4 = st.tabs(["📋 User Guide", "📈 Results", "🤵 Creator", "🔍 More"]) |
|
with tab1: |
|
@st.cache_resource |
|
def tab_1(): |
|
st.header("User Guide:") |
|
''' |
|
Langkah - langkah : |
|
1. Buka sidebar sebelah kiri |
|
2. Buka Scraping Settings |
|
3. Hati - hati jika menggunakan "Semua Reviews" karena bisa berjumlah jutaan data |
|
4. Masukkan URL app pada situs playstore |
|
5. Sesuaikan bahasa, negara, dan rating yang akan diambil |
|
6. Pilih tahun bar chart |
|
7. Pilih format unduhan |
|
8. Klik "Mulai Scraping" |
|
9. Buka tab Results |
|
''' |
|
tab_1() |
|
|
|
|
|
with tab2: |
|
st.header("Results:") |
|
|
|
if scraping_done == True: |
|
with st.expander(f"Hasil Scraping {app_id}:"): |
|
buat_chart(df, target_year) |
|
st.write(df) |
|
|
|
if download_format == "XLSX": |
|
|
|
cleaned_data = df.applymap(lambda x: "".join(char for char in str(x) if char.isprintable())) |
|
|
|
|
|
cleaned_data.to_excel(f"hasil_scraping_{app_id}.xlsx", index=False) |
|
|
|
|
|
st.download_button(label=f"Unduh XLSX ({len(reviews)} data)", data=open(f"hasil_scraping_{app_id}.xlsx", "rb").read(), key="xlsx_download", file_name=f"hasil_scraping_{app_id}.xlsx") |
|
|
|
elif download_format == "CSV": |
|
csv = df.to_csv(index=False) |
|
|
|
|
|
st.download_button(label=f"Unduh CSV ({len(reviews)} data)", data=csv, key="csv_download", file_name=f"hasil_scraping_{app_id}.csv") |
|
|
|
elif download_format == "JSON": |
|
json_data = df.to_json(orient="records") |
|
|
|
|
|
st.download_button(label=f"Unduh JSON ({len(reviews)} data)", data=json_data, key="json_download", file_name=f"hasil_scraping_{app_id}.json") |
|
|
|
else: |
|
st.info("Tidak ada data") |
|
|
|
with tab3: |
|
@st.cache_resource |
|
def tab_3(): |
|
st.header("Profile:") |
|
st.image('https://raw.githubusercontent.com/naufalnashif/naufalnashif.github.io/main/assets/img/my-profile-sidang-idCard-crop.JPG', caption='Naufal Nashif') |
|
st.subheader('Hello, nice to meet you !') |
|
|
|
github_link = "https://github.com/naufalnashif/" |
|
st.markdown(f"GitHub: [{github_link}]({github_link})") |
|
|
|
|
|
instagram_link = "https://www.instagram.com/naufal.nashif/" |
|
st.markdown(f"Instagram: [{instagram_link}]({instagram_link})") |
|
|
|
|
|
website_link = "https://naufalnashif.netlify.app/" |
|
st.markdown(f"Website: [{website_link}]({website_link})") |
|
tab_3() |
|
|
|
with tab4: |
|
@st.cache_resource |
|
def tab_4(): |
|
st.header("More:") |
|
more1, more2, more3 = st.columns(3) |
|
with more1 : |
|
st.image('https://raw.githubusercontent.com/naufalnashif/huggingface-repo/main/assets/img/sentiment-analysis-biskita.png', caption = 'Sentiment Analysis Web App') |
|
more1_link = "https://huggingface.co/spaces/naufalnashif/sentiment-analysis-ensemble-model" |
|
st.markdown(f"[{more1_link}]({more1_link})") |
|
with more2 : |
|
st.image('https://raw.githubusercontent.com/naufalnashif/huggingface-repo/main/assets/img/scraping-news-headline.png', caption = 'Scraping News Headline') |
|
more2_link = "https://huggingface.co/spaces/naufalnashif/scraping-news-headline" |
|
st.markdown(f"[{more2_link}]({more2_link})") |
|
with more3 : |
|
st.image('https://raw.githubusercontent.com/naufalnashif/huggingface-repo/main/assets/img/scraping-ecommerce.png', caption = 'Scraping Ecommerce Product') |
|
more3_link = "https://huggingface.co/spaces/naufalnashif/scraping-ecommerce-2023" |
|
st.markdown(f"[{more3_link}]({more3_link})") |
|
tab_4() |
|
|
|
|
|
st.divider() |
|
st.write('Thank you for trying the demo!') |
|
st.caption(f'Made with ❤️ by :blue[Naufal Nashif] ©️ {yearNow}') |
|
|