Commit
·
1d246f3
1
Parent(s):
002bcee
Update app.py
Browse files
app.py
CHANGED
@@ -78,6 +78,21 @@ def normalize_slang(text, slang_dict):
|
|
78 |
normalized_words = [slang_dict.get(word, word) for word in words]
|
79 |
return ' '.join(normalized_words)
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
# Fungsi untuk ekstraksi fitur TF-IDF
|
82 |
def extract_tfidf_features(texts, tfidf_vectorizer):
|
83 |
tfidf_matrix = tfidf_vectorizer.transform(texts)
|
@@ -87,6 +102,8 @@ def extract_tfidf_features(texts, tfidf_vectorizer):
|
|
87 |
tfidf_model_path = 'X_tfidf_model.joblib'
|
88 |
tfidf_vectorizer = joblib.load(tfidf_model_path)
|
89 |
|
|
|
|
|
90 |
# Fungsi untuk memilih model berdasarkan pilihan pengguna
|
91 |
def select_sentiment_model(selected_model):
|
92 |
if selected_model == "Ensemble":
|
@@ -134,6 +151,68 @@ def get_emoticon(sentiment):
|
|
134 |
|
135 |
return emoticon
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
# Fungsi untuk membuat tautan unduhan
|
138 |
def get_table_download_link(df, download_format):
|
139 |
if download_format == "XLSX":
|
@@ -146,32 +225,39 @@ def get_table_download_link(df, download_format):
|
|
146 |
|
147 |
# Judul
|
148 |
st.title("Analisis Sentimen Based on Tweets Biskita Transpakuan")
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
# Analisis sentimen
|
176 |
results = []
|
177 |
analisis = False
|
@@ -183,12 +269,16 @@ if st.button("Analysis") and input_option == "Teks Manual" and user_input:
|
|
183 |
emoticon = get_emoticon(sentiment_label)
|
184 |
cleaned_text = clean_text(text)
|
185 |
norm_slang_text = normalize_slang(cleaned_text, lookp_dict)
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
|
|
|
|
|
|
192 |
analisis = True
|
193 |
|
194 |
elif input_option == "Unggah Berkas XLSX" and uploaded_file is not None:
|
@@ -198,7 +288,26 @@ elif input_option == "Unggah Berkas XLSX" and uploaded_file is not None:
|
|
198 |
emoticon = get_emoticon(sentiment_label)
|
199 |
cleaned_text = clean_text(text)
|
200 |
norm_slang_text = normalize_slang(cleaned_text, lookp_dict)
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
analisis = True
|
203 |
|
204 |
else:
|
@@ -207,58 +316,61 @@ elif input_option == "Unggah Berkas XLSX" and uploaded_file is not None:
|
|
207 |
|
208 |
st.info('Tekan "Analysis" kembali jika tampilan menghilang', icon = 'ℹ️')
|
209 |
if results and analisis == True:
|
|
|
210 |
# Membagi tampilan menjadi dua kolom
|
211 |
columns = st.columns(2)
|
212 |
|
213 |
# Kolom pertama untuk Word Cloud
|
214 |
with columns[0]:
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
st.subheader("Word Cloud")
|
220 |
-
|
221 |
-
if all_texts:
|
222 |
-
wordcloud = WordCloud(width=800, height=660, background_color='white',
|
223 |
-
colormap='Purples', # Warna huruf
|
224 |
-
contour_color='black', # Warna kontur
|
225 |
-
contour_width=2, # Lebar kontur
|
226 |
-
mask=None, # Gunakan mask untuk bentuk kustom
|
227 |
-
).generate(all_texts)
|
228 |
-
st.image(wordcloud.to_array())
|
229 |
-
else:
|
230 |
-
st.write("Tidak ada data untuk ditampilkan dalam Word Cloud.")
|
231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
# Kolom kedua untuk Bar Chart
|
233 |
with columns[1]:
|
234 |
-
st.
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
#
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
value_counts = df_results["Hasil Analisis Sentimen"].value_counts()
|
244 |
|
245 |
-
|
246 |
-
|
247 |
|
248 |
-
|
249 |
-
|
250 |
-
plt.xticks(rotation=45)
|
251 |
|
252 |
-
|
|
|
|
|
|
|
253 |
|
254 |
# Menampilkan hasil analisis sentimen dalam kotak yang dapat diperluas
|
255 |
with st.expander("Hasil Analisis Sentimen"):
|
256 |
# Tampilkan tabel hasil analisis sentimen
|
257 |
-
st.write(pd.DataFrame(results
|
258 |
|
259 |
if results:
|
260 |
# Simpan DataFrame ke dalam file CSV
|
261 |
-
df = pd.DataFrame(results
|
262 |
csv = df.to_csv(index=False)
|
263 |
|
264 |
# Tampilkan tombol unduh CSV
|
|
|
78 |
normalized_words = [slang_dict.get(word, word) for word in words]
|
79 |
return ' '.join(normalized_words)
|
80 |
|
81 |
+
#---------------------------------------------------NLTK Remove Stopwords----------------------------------------------------------------------
|
82 |
+
|
83 |
+
# Inisialisasi stopwords bahasa Indonesia
|
84 |
+
nltk.download("stopwords")
|
85 |
+
stop_words = set(stopwords.words("indonesian"))
|
86 |
+
|
87 |
+
def remove_stopwords(text, stop_words):
|
88 |
+
# Pecah teks menjadi kata-kata
|
89 |
+
words = text.split()
|
90 |
+
|
91 |
+
# Hapus stopwords bahasa Indonesia
|
92 |
+
words = [word for word in words if word not in stop_words]
|
93 |
+
|
94 |
+
return " ".join(words)
|
95 |
+
#---------------------------------------------------TFIDF----------------------------------------------------------------------
|
96 |
# Fungsi untuk ekstraksi fitur TF-IDF
|
97 |
def extract_tfidf_features(texts, tfidf_vectorizer):
|
98 |
tfidf_matrix = tfidf_vectorizer.transform(texts)
|
|
|
102 |
tfidf_model_path = 'X_tfidf_model.joblib'
|
103 |
tfidf_vectorizer = joblib.load(tfidf_model_path)
|
104 |
|
105 |
+
#---------------------------------------------------Milih Model----------------------------------------------------------------------
|
106 |
+
|
107 |
# Fungsi untuk memilih model berdasarkan pilihan pengguna
|
108 |
def select_sentiment_model(selected_model):
|
109 |
if selected_model == "Ensemble":
|
|
|
151 |
|
152 |
return emoticon
|
153 |
|
154 |
+
def buat_chart(df, target_year):
|
155 |
+
|
156 |
+
st.write(f"Bar Chart Tahun {target_year}:")
|
157 |
+
|
158 |
+
# Ambil bulan
|
159 |
+
df['at'] = pd.to_datetime(df['at']) # Convert 'at' column to datetime
|
160 |
+
df['month'] = df['at'].dt.month
|
161 |
+
df['year'] = df['at'].dt.year
|
162 |
+
|
163 |
+
# Filter DataFrame for the desired year
|
164 |
+
df_filtered = df[df['year'] == target_year]
|
165 |
+
|
166 |
+
# Check if data for the target year is available
|
167 |
+
if df_filtered.empty:
|
168 |
+
st.warning(f"Tidak ada data untuk tahun {target_year}.")
|
169 |
+
return
|
170 |
+
|
171 |
+
# Mapping nilai bulan ke nama bulan
|
172 |
+
bulan_mapping = {
|
173 |
+
1: f'Januari {target_year}',
|
174 |
+
2: f'Februari {target_year}',
|
175 |
+
3: f'Maret {target_year}',
|
176 |
+
4: f'April {target_year}',
|
177 |
+
5: f'Mei {target_year}',
|
178 |
+
6: f'Juni {target_year}',
|
179 |
+
7: f'Juli {target_year}',
|
180 |
+
8: f'Agustus {target_year}',
|
181 |
+
9: f'September {target_year}',
|
182 |
+
10: f'Oktober {target_year}',
|
183 |
+
11: f'November {target_year}',
|
184 |
+
12: f'Desember {target_year}'
|
185 |
+
}
|
186 |
+
|
187 |
+
# Mengganti nilai dalam kolom 'month' menggunakan mapping
|
188 |
+
df_filtered['month'] = df_filtered['month'].replace(bulan_mapping)
|
189 |
+
|
190 |
+
# Menentukan warna untuk setiap kategori dalam kolom 'score'
|
191 |
+
warna_label = {
|
192 |
+
'Negatif': '#FF9AA2',
|
193 |
+
'Netral': '#FFDAC1',
|
194 |
+
'Positif': '#B5EAD7'
|
195 |
+
}
|
196 |
+
|
197 |
+
# Sorting unique scores
|
198 |
+
unique_label = sorted(df_filtered['label'].unique())
|
199 |
+
|
200 |
+
# Ensure months are in the correct order
|
201 |
+
months_order = [
|
202 |
+
f'Januari {target_year}', f'Februari {target_year}', f'Maret {target_year}', f'April {target_year}', f'Mei {target_year}', f'Juni {target_year}',
|
203 |
+
f'Juli {target_year}', f'Agustus {target_year}', f'September {target_year}', f'Oktober {target_year}', f'November {target_year}', f'Desember {target_year}'
|
204 |
+
]
|
205 |
+
|
206 |
+
# Sort DataFrame based on the custom order of months
|
207 |
+
df_filtered['month'] = pd.Categorical(df_filtered['month'], categories=months_order, ordered=True)
|
208 |
+
df_filtered = df_filtered.sort_values('month')
|
209 |
+
|
210 |
+
# Create a bar chart with stacking and manual colors
|
211 |
+
st.bar_chart(
|
212 |
+
df_filtered.groupby(['month', 'label']).size().unstack().fillna(0),
|
213 |
+
color=[warna_label[label] for label in unique_label]
|
214 |
+
)
|
215 |
+
|
216 |
# Fungsi untuk membuat tautan unduhan
|
217 |
def get_table_download_link(df, download_format):
|
218 |
if download_format == "XLSX":
|
|
|
225 |
|
226 |
# Judul
|
227 |
st.title("Analisis Sentimen Based on Tweets Biskita Transpakuan")
|
228 |
+
#-----------------------------------------------------General Settings---------------------------------------------------------------
|
229 |
+
with st.expander("General Settings :"):
|
230 |
+
# Tambahkan widget untuk memilih model
|
231 |
+
selected_model = st.selectbox("Pilih Model Sentimen:", ("Ensemble", "Naive Bayes", "Logistic Regression", "Transformer"))
|
232 |
+
|
233 |
+
# Memilih model sentimen berdasarkan pilihan pengguna
|
234 |
+
sentiment_model = select_sentiment_model(selected_model)
|
235 |
+
|
236 |
+
# Pilihan input teks manual atau berkas XLSX
|
237 |
+
input_option = st.radio("Pilih metode input:", ("Teks Manual", "Unggah Berkas XLSX"))
|
238 |
+
|
239 |
+
if input_option == "Teks Manual":
|
240 |
+
# Input teks dari pengguna
|
241 |
+
user_input = st.text_area("Masukkan teks:", "")
|
242 |
+
else:
|
243 |
+
# Input berkas XLSX
|
244 |
+
uploaded_file = st.file_uploader("Unggah berkas XLSX", type=["xlsx"])
|
245 |
+
st.write("**Pastikan berkas XLSX Anda memiliki kolom yang bernama 'Text'.**")
|
246 |
+
|
247 |
+
if uploaded_file is not None:
|
248 |
+
df = pd.read_excel(uploaded_file)
|
249 |
+
|
250 |
+
if 'Text' not in df.columns:
|
251 |
+
st.warning("Berkas XLSX harus memiliki kolom bernama 'Text' untuk analisis sentimen.")
|
252 |
+
else:
|
253 |
+
texts = df['Text'] # Sesuaikan dengan nama kolom di berkas XLSX Anda
|
254 |
+
if 'Date' in df.columns :
|
255 |
+
dates = df['Date']
|
256 |
+
#-----------------------------------------------------Preference Settings--------------------------------------------------
|
257 |
+
with st.expander ("Preference Settings :"):
|
258 |
+
colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
|
259 |
+
if dates:
|
260 |
+
target_year = st.selectbox("Pilih Tahun Bar Chart :", (2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025), index = 5)
|
261 |
# Analisis sentimen
|
262 |
results = []
|
263 |
analisis = False
|
|
|
269 |
emoticon = get_emoticon(sentiment_label)
|
270 |
cleaned_text = clean_text(text)
|
271 |
norm_slang_text = normalize_slang(cleaned_text, lookp_dict)
|
272 |
+
tanpa_stopwords = remove_stopwords(norm_slang_text, stop_words)
|
273 |
+
|
274 |
+
results_prep.append({
|
275 |
+
'Text': text,
|
276 |
+
'cleaned-text' : cleaned_text,
|
277 |
+
'normalisasi-text' : norm_slang_text,
|
278 |
+
'stopwords-remove' : tanpa_stopwords,
|
279 |
+
'label' : sentiment_label,
|
280 |
+
'emotikon' : emoticon,
|
281 |
+
})
|
282 |
analisis = True
|
283 |
|
284 |
elif input_option == "Unggah Berkas XLSX" and uploaded_file is not None:
|
|
|
288 |
emoticon = get_emoticon(sentiment_label)
|
289 |
cleaned_text = clean_text(text)
|
290 |
norm_slang_text = normalize_slang(cleaned_text, lookp_dict)
|
291 |
+
if dates in df.columns :
|
292 |
+
for date in dates :
|
293 |
+
results_prep.append({
|
294 |
+
'Date' : date,
|
295 |
+
'Text': text,
|
296 |
+
'cleaned-text' : cleaned_text,
|
297 |
+
'normalisasi-text' : norm_slang_text,
|
298 |
+
'stopwords-remove' : tanpa_stopwords,
|
299 |
+
'label' : sentiment_label,
|
300 |
+
'emotikon' : emoticon,
|
301 |
+
})
|
302 |
+
else :
|
303 |
+
results_prep.append({
|
304 |
+
'Text': text,
|
305 |
+
'cleaned-text' : cleaned_text,
|
306 |
+
'normalisasi-text' : norm_slang_text,
|
307 |
+
'stopwords-remove' : tanpa_stopwords,
|
308 |
+
'label' : sentiment_label,
|
309 |
+
'emotikon' : emoticon,
|
310 |
+
})
|
311 |
analisis = True
|
312 |
|
313 |
else:
|
|
|
316 |
|
317 |
st.info('Tekan "Analysis" kembali jika tampilan menghilang', icon = 'ℹ️')
|
318 |
if results and analisis == True:
|
319 |
+
df_results = pd.DataFrame(results)
|
320 |
# Membagi tampilan menjadi dua kolom
|
321 |
columns = st.columns(2)
|
322 |
|
323 |
# Kolom pertama untuk Word Cloud
|
324 |
with columns[0]:
|
325 |
+
st.write("Wordclouds")
|
326 |
+
all_texts = [result['stopwords-remove'] for result in results if result['stopwords-remove'] is not None and not pd.isna(result['stopwords-remove'])]
|
327 |
+
all_texts = " ".join(all_texts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
|
329 |
+
if all_texts:
|
330 |
+
wordcloud = WordCloud(width=800, height=660, background_color='white',
|
331 |
+
colormap=colormap, # Warna huruf
|
332 |
+
contour_color='black', # Warna kontur
|
333 |
+
contour_width=2, # Lebar kontur
|
334 |
+
mask=None, # Gunakan mask untuk bentuk kustom
|
335 |
+
).generate(all_texts)
|
336 |
+
st.image(wordcloud.to_array())
|
337 |
+
else:
|
338 |
+
st.write("Tidak ada data untuk ditampilkan dalam Word Cloud.")
|
339 |
+
|
340 |
+
if 'Date' in df_results.columns:
|
341 |
+
with columns[1]:
|
342 |
+
buat_chart(df_results, target_year)
|
343 |
# Kolom kedua untuk Bar Chart
|
344 |
with columns[1]:
|
345 |
+
st.write("Bar Chart :")
|
346 |
+
# Menentukan warna untuk setiap kategori dalam kolom 'score'
|
347 |
+
warna_label = {
|
348 |
+
'Negatif': '#FF9AA2',
|
349 |
+
'Netral': '#FFDAC1',
|
350 |
+
'Positif': '#B5EAD7'
|
351 |
+
}
|
352 |
+
# Menyiapkan label kelas
|
353 |
+
class_labels = ["Negatif", "Netral", "Positif"]
|
|
|
354 |
|
355 |
+
# Menghitung nilai hitungan per label
|
356 |
+
value_counts = df_results["label"].value_counts()
|
357 |
|
358 |
+
# Mengurutkan nilai hitungan berdasarkan label
|
359 |
+
value_counts = value_counts.reindex(class_labels)
|
|
|
360 |
|
361 |
+
st.bar_chart(
|
362 |
+
x=value_counts.index, y=value_counts.values,
|
363 |
+
color=[warna_label[label] for label in class_labels]
|
364 |
+
)
|
365 |
|
366 |
# Menampilkan hasil analisis sentimen dalam kotak yang dapat diperluas
|
367 |
with st.expander("Hasil Analisis Sentimen"):
|
368 |
# Tampilkan tabel hasil analisis sentimen
|
369 |
+
st.write(pd.DataFrame(results))
|
370 |
|
371 |
if results:
|
372 |
# Simpan DataFrame ke dalam file CSV
|
373 |
+
df = pd.DataFrame(results)
|
374 |
csv = df.to_csv(index=False)
|
375 |
|
376 |
# Tampilkan tombol unduh CSV
|