Spaces:
Sleeping
Sleeping
naufalnashif
commited on
Commit
•
1a8d286
1
Parent(s):
c644de6
Update app.py
Browse files
app.py
CHANGED
@@ -253,7 +253,11 @@ with st.expander("Settings :"):
|
|
253 |
|
254 |
query = st.text_input("Masukkan Query :")
|
255 |
jumlah = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
256 |
-
|
|
|
|
|
|
|
|
|
257 |
colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
|
258 |
words = st.number_input("Masukkan Jumlah Most Common Words :", min_value = 1, max_value = 15, step = 1, placeholder="Type a number...")
|
259 |
|
@@ -300,7 +304,7 @@ if selected_site == "CNBC Indonesia":
|
|
300 |
all_texts = ""
|
301 |
|
302 |
# Kolom pertama untuk Word Cloud
|
303 |
-
|
304 |
with columns[0]:
|
305 |
if results:
|
306 |
all_texts = [result[3] for result in results if result[3] is not None and not pd.isna(result[3])]
|
@@ -317,7 +321,8 @@ if selected_site == "CNBC Indonesia":
|
|
317 |
st.image(wordcloud.to_array())
|
318 |
|
319 |
|
320 |
-
|
|
|
321 |
with columns[1]:
|
322 |
st.subheader("Most Common Words")
|
323 |
|
@@ -336,8 +341,6 @@ if selected_site == "CNBC Indonesia":
|
|
336 |
|
337 |
st.pyplot(fig)
|
338 |
|
339 |
-
if not hidden_data:
|
340 |
-
st.warning(f"Tidak ada data pada query '{query}'", icon="⚠️")
|
341 |
|
342 |
#---------------------------------------------------Detik.com----------------------------------------------------------------------
|
343 |
|
@@ -373,55 +376,55 @@ elif selected_site == "Detik.com":
|
|
373 |
all_texts = ""
|
374 |
|
375 |
# Kolom pertama untuk Word Cloud
|
376 |
-
|
377 |
with columns[0]:
|
378 |
if results:
|
379 |
all_texts = [result[3] for result in results if result[3] is not None and not pd.isna(result[3])]
|
380 |
all_texts = " ".join(all_texts)
|
381 |
-
|
382 |
st.subheader("Word Cloud")
|
383 |
-
|
384 |
if all_texts:
|
385 |
wordcloud = WordCloud(width=800, height=610, background_color='white',
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
st.image(wordcloud.to_array())
|
391 |
-
|
392 |
-
|
393 |
-
|
|
|
394 |
with columns[1]:
|
395 |
st.subheader("Most Common Words")
|
396 |
-
|
397 |
if all_texts:
|
398 |
word_counts = Counter(all_texts.split())
|
399 |
most_common_words = word_counts.most_common(words)
|
400 |
-
|
401 |
words, counts = zip(*most_common_words)
|
402 |
-
|
403 |
fig, ax = plt.subplots(figsize=(10, 6))
|
404 |
ax.bar(words, counts)
|
405 |
ax.set_xlabel("Kata-kata")
|
406 |
ax.set_ylabel("Jumlah")
|
407 |
ax.set_title("Kata-kata Paling Umum")
|
408 |
ax.tick_params(axis='x', rotation=45)
|
409 |
-
|
410 |
st.pyplot(fig)
|
411 |
|
412 |
-
if not hidden_data:
|
413 |
-
st.warning(f"Tidak ada data pada query '{query}'", icon="⚠️")
|
414 |
-
|
415 |
#---------------------------------------------------Download File & Hasil Scraping----------------------------------------------------------------------
|
416 |
|
417 |
# Tampilkan hasil scraping
|
418 |
if scraping_done:
|
419 |
if hidden_data:
|
420 |
# Menampilkan hasil sentimen dalam kotak yang dapat diperluas
|
421 |
-
|
422 |
-
st.
|
423 |
-
|
424 |
-
|
|
|
|
|
425 |
if download_format == "XLSX":
|
426 |
df = pd.DataFrame(hidden_data, columns=["date", "judul-berita", "link-berita"])
|
427 |
df_prep = pd.DataFrame(results, columns=["Teks", "Cleaned Text", "Norm Text", "Tanpa Stopwords"])
|
@@ -446,7 +449,8 @@ if scraping_done:
|
|
446 |
|
447 |
st.download_button(label=f"Unduh Hasil Scraping TXT ({len(hidden_data)} data)", data=text_data, key="txt_download", file_name=f"hasil_scraping_{query}.txt")
|
448 |
|
449 |
-
|
|
|
450 |
if not scraping_done:
|
451 |
st.write("Tidak ada data untuk diunduh.")
|
452 |
|
|
|
253 |
|
254 |
query = st.text_input("Masukkan Query :")
|
255 |
jumlah = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
256 |
+
# Membuat multiselect box
|
257 |
+
selected_options = st.multiselect(
|
258 |
+
'Pilih tampilan:',
|
259 |
+
['Hasil Scraping', 'Hasil Preprocessing', 'Word Clouds', 'Most Common Words']
|
260 |
+
)
|
261 |
colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
|
262 |
words = st.number_input("Masukkan Jumlah Most Common Words :", min_value = 1, max_value = 15, step = 1, placeholder="Type a number...")
|
263 |
|
|
|
304 |
all_texts = ""
|
305 |
|
306 |
# Kolom pertama untuk Word Cloud
|
307 |
+
if 'Word Clouds' in selected_options:
|
308 |
with columns[0]:
|
309 |
if results:
|
310 |
all_texts = [result[3] for result in results if result[3] is not None and not pd.isna(result[3])]
|
|
|
321 |
st.image(wordcloud.to_array())
|
322 |
|
323 |
|
324 |
+
# Kolom kedua untuk Most Comon Words
|
325 |
+
if 'Most Common Words' in selected_options:
|
326 |
with columns[1]:
|
327 |
st.subheader("Most Common Words")
|
328 |
|
|
|
341 |
|
342 |
st.pyplot(fig)
|
343 |
|
|
|
|
|
344 |
|
345 |
#---------------------------------------------------Detik.com----------------------------------------------------------------------
|
346 |
|
|
|
376 |
all_texts = ""
|
377 |
|
378 |
# Kolom pertama untuk Word Cloud
|
379 |
+
if 'Word Clouds' in selected_options:
|
380 |
with columns[0]:
|
381 |
if results:
|
382 |
all_texts = [result[3] for result in results if result[3] is not None and not pd.isna(result[3])]
|
383 |
all_texts = " ".join(all_texts)
|
384 |
+
|
385 |
st.subheader("Word Cloud")
|
386 |
+
|
387 |
if all_texts:
|
388 |
wordcloud = WordCloud(width=800, height=610, background_color='white',
|
389 |
+
colormap=colormap,
|
390 |
+
contour_color='black',
|
391 |
+
contour_width=2,
|
392 |
+
mask=None).generate(all_texts)
|
393 |
st.image(wordcloud.to_array())
|
394 |
+
|
395 |
+
|
396 |
+
# Kolom kedua untuk Most Comon Words
|
397 |
+
if 'Most Common Words' in selected_options:
|
398 |
with columns[1]:
|
399 |
st.subheader("Most Common Words")
|
400 |
+
|
401 |
if all_texts:
|
402 |
word_counts = Counter(all_texts.split())
|
403 |
most_common_words = word_counts.most_common(words)
|
404 |
+
|
405 |
words, counts = zip(*most_common_words)
|
406 |
+
|
407 |
fig, ax = plt.subplots(figsize=(10, 6))
|
408 |
ax.bar(words, counts)
|
409 |
ax.set_xlabel("Kata-kata")
|
410 |
ax.set_ylabel("Jumlah")
|
411 |
ax.set_title("Kata-kata Paling Umum")
|
412 |
ax.tick_params(axis='x', rotation=45)
|
413 |
+
|
414 |
st.pyplot(fig)
|
415 |
|
|
|
|
|
|
|
416 |
#---------------------------------------------------Download File & Hasil Scraping----------------------------------------------------------------------
|
417 |
|
418 |
# Tampilkan hasil scraping
|
419 |
if scraping_done:
|
420 |
if hidden_data:
|
421 |
# Menampilkan hasil sentimen dalam kotak yang dapat diperluas
|
422 |
+
if 'Hasil Preprocessing' in selected_options:
|
423 |
+
with st.expander(f"Hasil Scraping {selected_site} :"):
|
424 |
+
st.write(pd.DataFrame(hidden_data, columns=["date", "judul-berita", "link-berita"]))
|
425 |
+
if 'Hasil Preprocessing' in selected_options:
|
426 |
+
with st.expander(f"Hasil Cleaning Data :"):
|
427 |
+
st.write(pd.DataFrame(results, columns=["Teks", "Cleaned Text", "Norm Text", "Tanpa Stopwords"]))
|
428 |
if download_format == "XLSX":
|
429 |
df = pd.DataFrame(hidden_data, columns=["date", "judul-berita", "link-berita"])
|
430 |
df_prep = pd.DataFrame(results, columns=["Teks", "Cleaned Text", "Norm Text", "Tanpa Stopwords"])
|
|
|
449 |
|
450 |
st.download_button(label=f"Unduh Hasil Scraping TXT ({len(hidden_data)} data)", data=text_data, key="txt_download", file_name=f"hasil_scraping_{query}.txt")
|
451 |
|
452 |
+
if not hidden_data:
|
453 |
+
st.warning(f"Tidak ada data pada query '{query}'", icon="⚠️")
|
454 |
if not scraping_done:
|
455 |
st.write("Tidak ada data untuk diunduh.")
|
456 |
|