Commit
·
7e56286
1
Parent(s):
6f065f8
Update app.py
Browse files
app.py
CHANGED
@@ -57,18 +57,14 @@ def clean_text(text):
|
|
57 |
text = re.sub(r'(\w)\1{2,}', r'\1', text)
|
58 |
|
59 |
return text
|
60 |
-
|
61 |
-
# Fungsi untuk normalisasi kata gaul
|
62 |
@st.cache_data
|
63 |
-
def
|
64 |
-
# Membaca kamus kata gaul Salsabila
|
65 |
-
kamus_path = '_json_colloquial-indonesian-lexicon (1).txt' # Ganti dengan path yang benar
|
66 |
with open(kamus_path) as f:
|
67 |
data = f.read()
|
68 |
lookp_dict = json.loads(data)
|
69 |
|
70 |
# Dict kata gaul saya sendiri yang tidak masuk di dict Salsabila
|
71 |
-
kamus_sendiri_path = 'kamus_gaul_custom.txt'
|
72 |
with open(kamus_sendiri_path) as f:
|
73 |
kamus_sendiri = f.read()
|
74 |
kamus_gaul_baru = json.loads(kamus_sendiri)
|
@@ -76,6 +72,17 @@ def normalize_slang(text, slang_dict):
|
|
76 |
# Menambahkan dict kata gaul baru ke kamus yang sudah ada
|
77 |
lookp_dict.update(kamus_gaul_baru)
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
words = text.split()
|
80 |
normalized_words = [slang_dict.get(word, word) for word in words]
|
81 |
return ' '.join(normalized_words)
|
@@ -84,8 +91,6 @@ def normalize_slang(text, slang_dict):
|
|
84 |
|
85 |
@st.cache_data
|
86 |
def remove_stopwords(text, stop_words):
|
87 |
-
nltk.download("stopwords")
|
88 |
-
stop_words = set(stopwords.words("indonesian"))
|
89 |
# Pecah teks menjadi kata-kata
|
90 |
words = text.split()
|
91 |
|
@@ -227,6 +232,7 @@ def get_table_download_link(df, download_format):
|
|
227 |
|
228 |
# Judul
|
229 |
st.title("Analisis Sentimen Based on Tweets Biskita Transpakuan")
|
|
|
230 |
#-----------------------------------------------------General Settings---------------------------------------------------------------
|
231 |
with st.expander("General Settings :"):
|
232 |
# Tambahkan widget untuk memilih model
|
@@ -244,7 +250,7 @@ with st.expander("General Settings :"):
|
|
244 |
else:
|
245 |
# Input berkas XLSX
|
246 |
uploaded_file = st.file_uploader("Unggah berkas XLSX", type=["xlsx"])
|
247 |
-
st.
|
248 |
|
249 |
if uploaded_file is not None:
|
250 |
df = pd.read_excel(uploaded_file)
|
@@ -258,13 +264,14 @@ with st.expander("General Settings :"):
|
|
258 |
if "Date" in df.columns :
|
259 |
if not df['Date'].empty:
|
260 |
dates = df['Date']
|
261 |
-
|
262 |
-
target_year = st.selectbox("Pilih Tahun Bar Chart :", df['Date'].str[:4].unique())
|
263 |
|
264 |
#-----------------------------------------------------Preference Settings--------------------------------------------------
|
265 |
with st.expander ("Preference Settings :"):
|
266 |
colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
|
267 |
-
|
|
|
|
|
268 |
# Analisis sentimen
|
269 |
results = []
|
270 |
analisis = False
|
|
|
57 |
text = re.sub(r'(\w)\1{2,}', r'\1', text)
|
58 |
|
59 |
return text
|
|
|
|
|
60 |
@st.cache_data
|
61 |
+
def load_file(kamus_path, kamus_sendiri_path):
|
62 |
+
# Membaca kamus kata gaul Salsabila
|
|
|
63 |
with open(kamus_path) as f:
|
64 |
data = f.read()
|
65 |
lookp_dict = json.loads(data)
|
66 |
|
67 |
# Dict kata gaul saya sendiri yang tidak masuk di dict Salsabila
|
|
|
68 |
with open(kamus_sendiri_path) as f:
|
69 |
kamus_sendiri = f.read()
|
70 |
kamus_gaul_baru = json.loads(kamus_sendiri)
|
|
|
72 |
# Menambahkan dict kata gaul baru ke kamus yang sudah ada
|
73 |
lookp_dict.update(kamus_gaul_baru)
|
74 |
|
75 |
+
nltk.download("stopwords")
|
76 |
+
stop_words = set(stopwords.words("indonesian"))
|
77 |
+
return lookp_dict, stop_words
|
78 |
+
|
79 |
+
kamus_path = '_json_colloquial-indonesian-lexicon (1).txt'
|
80 |
+
kamus_sendiri_path = 'kamus_gaul_custom.txt'
|
81 |
+
lookp_dict, stop_words = load_file(kamus_path, kamus_sendiri_path)
|
82 |
+
|
83 |
+
# Fungsi untuk normalisasi kata gaul
|
84 |
+
@st.cache_data
|
85 |
+
def normalize_slang(text, slang_dict):
|
86 |
words = text.split()
|
87 |
normalized_words = [slang_dict.get(word, word) for word in words]
|
88 |
return ' '.join(normalized_words)
|
|
|
91 |
|
92 |
@st.cache_data
|
93 |
def remove_stopwords(text, stop_words):
|
|
|
|
|
94 |
# Pecah teks menjadi kata-kata
|
95 |
words = text.split()
|
96 |
|
|
|
232 |
|
233 |
# Judul
|
234 |
st.title("Analisis Sentimen Based on Tweets Biskita Transpakuan")
|
235 |
+
preference_barchart_date = False
|
236 |
#-----------------------------------------------------General Settings---------------------------------------------------------------
|
237 |
with st.expander("General Settings :"):
|
238 |
# Tambahkan widget untuk memilih model
|
|
|
250 |
else:
|
251 |
# Input berkas XLSX
|
252 |
uploaded_file = st.file_uploader("Unggah berkas XLSX", type=["xlsx"])
|
253 |
+
st.info("Pastikan berkas XLSX Anda memiliki kolom yang bernama 'Text'.")
|
254 |
|
255 |
if uploaded_file is not None:
|
256 |
df = pd.read_excel(uploaded_file)
|
|
|
264 |
if "Date" in df.columns :
|
265 |
if not df['Date'].empty:
|
266 |
dates = df['Date']
|
267 |
+
preference_barchart_date = True
|
|
|
268 |
|
269 |
#-----------------------------------------------------Preference Settings--------------------------------------------------
|
270 |
with st.expander ("Preference Settings :"):
|
271 |
colormap = st.selectbox("Pilih Warna Wordclouds :", ["Greys", "Purples", "Blues", "Greens", "Oranges", "Reds", "YlOrBr", "YlOrRd", "OrRd", "PuRd", "RdPu", "BuPu", "GnBu", "PuBu", "YlGnBu", "PuBuGn", "BuGn", "YlGn"])
|
272 |
+
if preference_barchart_date == True:
|
273 |
+
bar = st.selectbox("Pilih Tampilan Bar Chart :", ("Distribusi Kelas", "Distribusi Kelas Berdasarkan Waktu"), index = 0)
|
274 |
+
target_year = st.selectbox("Pilih Tahun Bar Chart :", df['Date'].str[:4].unique())
|
275 |
# Analisis sentimen
|
276 |
results = []
|
277 |
analisis = False
|