naufalnashif commited on
Commit
7e56286
·
1 Parent(s): 6f065f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -57,18 +57,14 @@ def clean_text(text):
57
  text = re.sub(r'(\w)\1{2,}', r'\1', text)
58
 
59
  return text
60
-
61
- # Fungsi untuk normalisasi kata gaul
62
  @st.cache_data
63
- def normalize_slang(text, slang_dict):
64
- # Membaca kamus kata gaul Salsabila
65
- kamus_path = '_json_colloquial-indonesian-lexicon (1).txt' # Ganti dengan path yang benar
66
  with open(kamus_path) as f:
67
  data = f.read()
68
  lookp_dict = json.loads(data)
69
 
70
  # Dict kata gaul saya sendiri yang tidak masuk di dict Salsabila
71
- kamus_sendiri_path = 'kamus_gaul_custom.txt'
72
  with open(kamus_sendiri_path) as f:
73
  kamus_sendiri = f.read()
74
  kamus_gaul_baru = json.loads(kamus_sendiri)
@@ -76,6 +72,17 @@ def normalize_slang(text, slang_dict):
76
  # Menambahkan dict kata gaul baru ke kamus yang sudah ada
77
  lookp_dict.update(kamus_gaul_baru)
78
 
 
 
 
 
 
 
 
 
 
 
 
79
  words = text.split()
80
  normalized_words = [slang_dict.get(word, word) for word in words]
81
  return ' '.join(normalized_words)
@@ -84,8 +91,6 @@ def normalize_slang(text, slang_dict):
84
 
85
  @st.cache_data
86
  def remove_stopwords(text, stop_words):
87
- nltk.download("stopwords")
88
- stop_words = set(stopwords.words("indonesian"))
89
  # Pecah teks menjadi kata-kata
90
  words = text.split()
91
 
@@ -227,6 +232,7 @@ def get_table_download_link(df, download_format):
227
 
228
  # Judul
229
  st.title("Analisis Sentimen Based on Tweets Biskita Transpakuan")
 
230
  #-----------------------------------------------------General Settings---------------------------------------------------------------
231
  with st.expander("General Settings :"):
232
  # Tambahkan widget untuk memilih model
@@ -244,7 +250,7 @@ with st.expander("General Settings :"):
244
  else:
245
  # Input berkas XLSX
246
  uploaded_file = st.file_uploader("Unggah berkas XLSX", type=["xlsx"])
247
- st.write("**Pastikan berkas XLSX Anda memiliki kolom yang bernama 'Text'.**")
248
 
249
  if uploaded_file is not None:
250
  df = pd.read_excel(uploaded_file)
@@ -258,13 +264,14 @@ with st.expander("General Settings :"):
258
  if "Date" in df.columns :
259
  if not df['Date'].empty:
260
  dates = df['Date']
261
- bar = st.selectbox("Pilih Tampilan Bar Chart :", ("Distribusi Kelas", "Distribusi Kelas Berdasarkan Waktu"), index = 0)
262
- target_year = st.selectbox("Pilih Tahun Bar Chart :", df['Date'].str[:4].unique())
263
 
264
  #-----------------------------------------------------Preference Settings--------------------------------------------------
265
  with st.expander ("Preference Settings :"):
266
  colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
267
-
 
 
268
  # Analisis sentimen
269
  results = []
270
  analisis = False
 
57
  text = re.sub(r'(\w)\1{2,}', r'\1', text)
58
 
59
  return text
 
 
60
  @st.cache_data
61
+ def load_file(kamus_path, kamus_sendiri_path):
62
+ # Membaca kamus kata gaul Salsabila
 
63
  with open(kamus_path) as f:
64
  data = f.read()
65
  lookp_dict = json.loads(data)
66
 
67
  # Dict kata gaul saya sendiri yang tidak masuk di dict Salsabila
 
68
  with open(kamus_sendiri_path) as f:
69
  kamus_sendiri = f.read()
70
  kamus_gaul_baru = json.loads(kamus_sendiri)
 
72
  # Menambahkan dict kata gaul baru ke kamus yang sudah ada
73
  lookp_dict.update(kamus_gaul_baru)
74
 
75
+ nltk.download("stopwords")
76
+ stop_words = set(stopwords.words("indonesian"))
77
+ return lookp_dict, stop_words
78
+
79
+ kamus_path = '_json_colloquial-indonesian-lexicon (1).txt'
80
+ kamus_sendiri_path = 'kamus_gaul_custom.txt'
81
+ lookp_dict, stop_words = load_file(kamus_path, kamus_sendiri_path)
82
+
83
+ # Fungsi untuk normalisasi kata gaul
84
+ @st.cache_data
85
+ def normalize_slang(text, slang_dict):
86
  words = text.split()
87
  normalized_words = [slang_dict.get(word, word) for word in words]
88
  return ' '.join(normalized_words)
 
91
 
92
  @st.cache_data
93
  def remove_stopwords(text, stop_words):
 
 
94
  # Pecah teks menjadi kata-kata
95
  words = text.split()
96
 
 
232
 
233
  # Judul
234
  st.title("Analisis Sentimen Based on Tweets Biskita Transpakuan")
235
+ preference_barchart_date = False
236
  #-----------------------------------------------------General Settings---------------------------------------------------------------
237
  with st.expander("General Settings :"):
238
  # Tambahkan widget untuk memilih model
 
250
  else:
251
  # Input berkas XLSX
252
  uploaded_file = st.file_uploader("Unggah berkas XLSX", type=["xlsx"])
253
+ st.info("Pastikan berkas XLSX Anda memiliki kolom yang bernama 'Text'.")
254
 
255
  if uploaded_file is not None:
256
  df = pd.read_excel(uploaded_file)
 
264
  if "Date" in df.columns :
265
  if not df['Date'].empty:
266
  dates = df['Date']
267
+ preference_barchart_date = True
 
268
 
269
  #-----------------------------------------------------Preference Settings--------------------------------------------------
270
  with st.expander ("Preference Settings :"):
271
  colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
272
+ if preference_barchart_date == True:
273
+ bar = st.selectbox("Pilih Tampilan Bar Chart :", ("Distribusi Kelas", "Distribusi Kelas Berdasarkan Waktu"), index = 0)
274
+ target_year = st.selectbox("Pilih Tahun Bar Chart :", df['Date'].str[:4].unique())
275
  # Analisis sentimen
276
  results = []
277
  analisis = False