Commit
·
2e68b49
1
Parent(s):
36a2c5c
Update app.py
Browse files
app.py
CHANGED
@@ -287,101 +287,123 @@ def get_table_download_link(df, download_format):
|
|
287 |
st.title("Sentiment Analysis : Based on Tweets Biskita Transpakuan Bogor 2022-2023")
|
288 |
preference_barchart_date = False
|
289 |
#-----------------------------------------------------General Settings---------------------------------------------------------------
|
290 |
-
with
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
if 'Text' not in df.columns:
|
313 |
-
st.warning("Berkas XLSX harus memiliki kolom bernama 'Text' untuk analisis sentimen.")
|
314 |
-
if not df['Text'].empty:
|
315 |
-
st.warning("Kolom 'Text' harus mempunyai value.")
|
316 |
-
else:
|
317 |
-
texts = df['Text'] # Sesuaikan dengan nama kolom di berkas XLSX Anda
|
318 |
-
if "Date" in df.columns :
|
319 |
-
if not df['Date'].empty:
|
320 |
-
dates = df['Date']
|
321 |
-
preference_barchart_date = True
|
322 |
-
|
323 |
-
#-----------------------------------------------------Preference Settings--------------------------------------------------
|
324 |
-
with st.expander ("Preference Settings :"):
|
325 |
-
colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
|
326 |
-
if preference_barchart_date == True:
|
327 |
-
bar = st.selectbox("Pilih Tampilan Bar Chart :", ("Distribusi Kelas", "Distribusi Kelas Berdasarkan Waktu"), index = 0)
|
328 |
-
target_year = st.selectbox("Pilih Tahun Bar Chart :", df['Date'].str[:4].unique())
|
329 |
-
# Analisis sentimen
|
330 |
-
results = []
|
331 |
-
analisis = False
|
332 |
-
if input_option == "Teks Manual" and user_input:
|
333 |
-
if st.button("Analysis"):
|
334 |
-
# Pisahkan teks yang dimasukkan pengguna menjadi baris-baris terpisah
|
335 |
-
user_texts = user_input.split('\n')
|
336 |
-
for text in user_texts:
|
337 |
-
sentiment_label = predict_sentiment(text, sentiment_model, tfidf_vectorizer, lookp_dict)
|
338 |
-
emoticon = get_emoticon(sentiment_label)
|
339 |
-
cleaned_text = clean_text(text)
|
340 |
-
norm_slang_text = normalize_slang(cleaned_text, lookp_dict)
|
341 |
-
tanpa_stopwords = remove_stopwords(norm_slang_text, stop_words)
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
'label' : sentiment_label,
|
349 |
-
'emotikon' : emoticon,
|
350 |
-
})
|
351 |
-
analisis = True
|
352 |
-
|
353 |
-
elif input_option == "Unggah Berkas XLSX" and uploaded_file is not None:
|
354 |
-
if st.button("Analysis"):
|
355 |
-
results, analisis = all_data_process(texts, df, sentiment_model, tfidf_vectorizer, lookp_dict, stop_words)
|
356 |
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
|
|
|
|
|
|
|
|
|
|
368 |
|
369 |
-
if
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
st.image(wordcloud.to_array())
|
377 |
-
else:
|
378 |
-
st.write("Tidak ada data untuk ditampilkan dalam Word Cloud.")
|
379 |
-
|
380 |
-
if 'Date' in df_results.columns:
|
381 |
-
if bar == "Distribusi Kelas Berdasarkan Waktu":
|
382 |
-
if not df_results['Date'].empty:
|
383 |
with columns[1]:
|
384 |
-
|
|
|
|
|
|
|
|
|
385 |
else :
|
386 |
# Kolom kedua untuk Bar Chart
|
387 |
with columns[1]:
|
@@ -390,36 +412,28 @@ if results and analisis == True:
|
|
390 |
st.bar_chart(
|
391 |
df_results["label"].value_counts()
|
392 |
)
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
st.write(
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
)
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
# Simpan DataFrame ke dalam file CSV
|
408 |
-
df = pd.DataFrame(results)
|
409 |
-
csv = df.to_csv(index=False)
|
410 |
-
|
411 |
-
# Tampilkan tombol unduh CSV
|
412 |
-
st.download_button(label="Unduh CSV", data=csv, key="csv_download", file_name="hasil_sentimen.csv")
|
413 |
-
else:
|
414 |
-
st.write("Tidak ada data untuk diunduh.")
|
415 |
|
416 |
|
417 |
# Garis pemisah
|
418 |
st.divider()
|
419 |
-
left, right = st.columns(
|
420 |
|
421 |
with left :
|
422 |
-
st.image(https://naufalnashif.github.io/assets/images/WhatsApp%20Image%202023-01-26%20at%2020.37.17.jpeg, caption='Naufal Nashif')
|
423 |
|
424 |
with right :
|
425 |
# Tautan ke GitHub
|
|
|
287 |
st.title("Sentiment Analysis : Based on Tweets Biskita Transpakuan Bogor 2022-2023")
|
288 |
preference_barchart_date = False
|
289 |
#-----------------------------------------------------General Settings---------------------------------------------------------------
|
290 |
+
with s.sidebar :
|
291 |
+
with st.expander("General Settings :"):
|
292 |
+
# Tambahkan widget untuk memilih model
|
293 |
+
selected_model = st.selectbox("Pilih Model Sentimen:", ("Ensemble", "Naive Bayes", "Logistic Regression", "Transformer"))
|
294 |
+
|
295 |
+
# Memilih model sentimen berdasarkan pilihan pengguna
|
296 |
+
sentiment_model = select_sentiment_model(selected_model)
|
297 |
+
|
298 |
+
# Pilihan input teks manual atau berkas XLSX
|
299 |
+
input_option = st.radio("Pilih metode input:", ("Teks Manual", "Unggah Berkas XLSX"))
|
300 |
+
|
301 |
+
if input_option == "Teks Manual":
|
302 |
+
# Input teks dari pengguna
|
303 |
+
user_input = st.text_area("Masukkan teks:", "")
|
304 |
+
else:
|
305 |
+
# Input berkas XLSX
|
306 |
+
uploaded_file = st.file_uploader("Unggah berkas XLSX", type=["xlsx"])
|
307 |
+
st.caption("Pastikan berkas XLSX Anda memiliki kolom yang bernama :blue[Text] _(Maks.500 data)_.")
|
308 |
+
st.caption("Jika terdapat kolom type :blue[datetime], ganti nama kolom menjadi :blue[Date]")
|
309 |
+
|
310 |
+
if uploaded_file is not None:
|
311 |
+
df = pd.read_excel(uploaded_file)
|
312 |
+
df = df[:500]
|
313 |
+
if 'Text' not in df.columns:
|
314 |
+
st.warning("Berkas XLSX harus memiliki kolom bernama 'Text' untuk analisis sentimen.")
|
315 |
+
if not df['Text'].empty:
|
316 |
+
st.warning("Kolom 'Text' harus mempunyai value.")
|
317 |
+
else:
|
318 |
+
texts = df['Text'] # Sesuaikan dengan nama kolom di berkas XLSX Anda
|
319 |
+
if "Date" in df.columns :
|
320 |
+
if not df['Date'].empty:
|
321 |
+
dates = df['Date']
|
322 |
+
preference_barchart_date = True
|
323 |
+
|
324 |
+
#-----------------------------------------------------Preference Settings--------------------------------------------------
|
325 |
+
with st.expander ("Preference Settings :"):
|
326 |
+
colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
|
327 |
+
if preference_barchart_date == True:
|
328 |
+
bar = st.selectbox("Pilih Tampilan Bar Chart :", ("Distribusi Kelas", "Distribusi Kelas Berdasarkan Waktu"), index = 0)
|
329 |
+
target_year = st.selectbox("Pilih Tahun Bar Chart :", df['Date'].str[:4].unique())
|
330 |
+
|
331 |
+
tab1, tab2, tab3 = st.tabs(["Profile", "Documentation", "Results"])
|
332 |
+
|
333 |
+
with tab1:
|
334 |
+
st.header("Profile :")
|
335 |
+
st.image('https://naufalnashif.github.io/assets/images/WhatsApp%20Image%202023-01-26%20at%2020.37.17.jpeg', caption='Naufal Nashif')
|
336 |
+
|
337 |
+
with tab2:
|
338 |
+
st.header("Documentation :")
|
339 |
+
|
340 |
+
|
341 |
+
with tab3:
|
342 |
+
st.header("Results :")
|
343 |
+
# Analisis sentimen
|
344 |
+
results = []
|
345 |
+
analisis = False
|
346 |
+
if input_option == "Teks Manual" and user_input:
|
347 |
+
if st.button("Analysis"):
|
348 |
+
# Pisahkan teks yang dimasukkan pengguna menjadi baris-baris terpisah
|
349 |
+
user_texts = user_input.split('\n')
|
350 |
+
for text in user_texts:
|
351 |
+
sentiment_label = predict_sentiment(text, sentiment_model, tfidf_vectorizer, lookp_dict)
|
352 |
+
emoticon = get_emoticon(sentiment_label)
|
353 |
+
cleaned_text = clean_text(text)
|
354 |
+
norm_slang_text = normalize_slang(cleaned_text, lookp_dict)
|
355 |
+
tanpa_stopwords = remove_stopwords(norm_slang_text, stop_words)
|
356 |
+
|
357 |
+
results.append({
|
358 |
+
'Text': text,
|
359 |
+
'cleaned-text' : cleaned_text,
|
360 |
+
'normalisasi-text' : norm_slang_text,
|
361 |
+
'stopwords-remove' : tanpa_stopwords,
|
362 |
+
'label' : sentiment_label,
|
363 |
+
'emotikon' : emoticon,
|
364 |
+
})
|
365 |
+
analisis = True
|
366 |
|
367 |
+
elif input_option == "Unggah Berkas XLSX" and uploaded_file is not None:
|
368 |
+
if st.button("Analysis"):
|
369 |
+
results, analisis = all_data_process(texts, df, sentiment_model, tfidf_vectorizer, lookp_dict, stop_words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
|
371 |
+
st.info('Tekan "Analysis" kembali jika tampilan menghilang', icon = 'ℹ️')
|
372 |
+
if results and analisis == True:
|
373 |
+
df_results = pd.DataFrame(results)
|
374 |
+
# Membagi tampilan menjadi dua kolom
|
375 |
+
columns = st.columns(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
|
377 |
+
# Kolom pertama untuk Word Cloud
|
378 |
+
with columns[0]:
|
379 |
+
st.write("Wordclouds :")
|
380 |
+
all_texts = [result['stopwords-remove'] for result in results if result['stopwords-remove'] is not None and not pd.isna(result['stopwords-remove'])]
|
381 |
+
all_texts = " ".join(all_texts)
|
382 |
+
|
383 |
+
if all_texts:
|
384 |
+
wordcloud = WordCloud(width=800, height=660, background_color='white',
|
385 |
+
colormap=colormap, # Warna huruf
|
386 |
+
contour_color='black', # Warna kontur
|
387 |
+
contour_width=2, # Lebar kontur
|
388 |
+
mask=None, # Gunakan mask untuk bentuk kustom
|
389 |
+
).generate(all_texts)
|
390 |
+
st.image(wordcloud.to_array())
|
391 |
+
else:
|
392 |
+
st.write("Tidak ada data untuk ditampilkan dalam Word Cloud.")
|
393 |
|
394 |
+
if 'Date' in df_results.columns:
|
395 |
+
if bar == "Distribusi Kelas Berdasarkan Waktu":
|
396 |
+
if not df_results['Date'].empty:
|
397 |
+
with columns[1]:
|
398 |
+
buat_chart(df_results, target_year)
|
399 |
+
else :
|
400 |
+
# Kolom kedua untuk Bar Chart
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
with columns[1]:
|
402 |
+
st.write("Bar Chart :")
|
403 |
+
# Membuat bar chart
|
404 |
+
st.bar_chart(
|
405 |
+
df_results["label"].value_counts()
|
406 |
+
)
|
407 |
else :
|
408 |
# Kolom kedua untuk Bar Chart
|
409 |
with columns[1]:
|
|
|
412 |
st.bar_chart(
|
413 |
df_results["label"].value_counts()
|
414 |
)
|
415 |
+
# Menampilkan hasil analisis sentimen dalam kotak yang dapat diperluas
|
416 |
+
with st.expander("Hasil Analisis Sentimen"):
|
417 |
+
# Tampilkan tabel hasil analisis sentimen
|
418 |
+
st.write(pd.DataFrame(results))
|
419 |
+
|
420 |
+
if results:
|
421 |
+
# Simpan DataFrame ke dalam file CSV
|
422 |
+
df = pd.DataFrame(results)
|
423 |
+
csv = df.to_csv(index=False)
|
424 |
+
|
425 |
+
# Tampilkan tombol unduh CSV
|
426 |
+
st.download_button(label="Unduh CSV", data=csv, key="csv_download", file_name="hasil_sentimen.csv")
|
427 |
+
else:
|
428 |
+
st.write("Tidak ada data untuk diunduh.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
|
431 |
# Garis pemisah
|
432 |
st.divider()
|
433 |
+
left, right = st.columns([1,3])
|
434 |
|
435 |
with left :
|
436 |
+
st.image('https://naufalnashif.github.io/assets/images/WhatsApp%20Image%202023-01-26%20at%2020.37.17.jpeg', caption='Naufal Nashif')
|
437 |
|
438 |
with right :
|
439 |
# Tautan ke GitHub
|