Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- Indonesia_stopwords.txt +778 -0
- README.md +5 -5
- app.py +56 -0
- eda.py +40 -0
- gitattributes +35 -0
- hard_voting_classifier.pkl +3 -0
- prediction.py +37 -0
- requirements.txt +12 -0
- vectorizer.pkl +3 -0
Indonesia_stopwords.txt
ADDED
@@ -0,0 +1,778 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ada
|
2 |
+
adalah
|
3 |
+
adanya
|
4 |
+
adapun
|
5 |
+
agak
|
6 |
+
agaknya
|
7 |
+
agar
|
8 |
+
akan
|
9 |
+
akankah
|
10 |
+
akhir
|
11 |
+
akhiri
|
12 |
+
akhirnya
|
13 |
+
aku
|
14 |
+
akulah
|
15 |
+
amat
|
16 |
+
amatlah
|
17 |
+
anda
|
18 |
+
andalah
|
19 |
+
antar
|
20 |
+
antara
|
21 |
+
antaranya
|
22 |
+
apa
|
23 |
+
apaan
|
24 |
+
apabila
|
25 |
+
apakah
|
26 |
+
apalagi
|
27 |
+
apatah
|
28 |
+
artinya
|
29 |
+
asal
|
30 |
+
asalkan
|
31 |
+
atas
|
32 |
+
atau
|
33 |
+
ataukah
|
34 |
+
ataupun
|
35 |
+
awal
|
36 |
+
awalnya
|
37 |
+
bagai
|
38 |
+
bagaikan
|
39 |
+
bagaimana
|
40 |
+
bagaimanakah
|
41 |
+
bagaimanapun
|
42 |
+
bagi
|
43 |
+
bagian
|
44 |
+
bahkan
|
45 |
+
bahwa
|
46 |
+
bahwasanya
|
47 |
+
baik
|
48 |
+
bakal
|
49 |
+
bakalan
|
50 |
+
balik
|
51 |
+
banyak
|
52 |
+
bapak
|
53 |
+
baru
|
54 |
+
bawah
|
55 |
+
beberapa
|
56 |
+
begini
|
57 |
+
beginian
|
58 |
+
beginikah
|
59 |
+
beginilah
|
60 |
+
begitu
|
61 |
+
begitukah
|
62 |
+
begitulah
|
63 |
+
begitupun
|
64 |
+
bekerja
|
65 |
+
belakang
|
66 |
+
belakangan
|
67 |
+
belum
|
68 |
+
belumlah
|
69 |
+
benar
|
70 |
+
benarkah
|
71 |
+
benarlah
|
72 |
+
berada
|
73 |
+
berakhir
|
74 |
+
berakhirlah
|
75 |
+
berakhirnya
|
76 |
+
berapa
|
77 |
+
berapakah
|
78 |
+
berapalah
|
79 |
+
berapapun
|
80 |
+
berarti
|
81 |
+
berawal
|
82 |
+
berbagai
|
83 |
+
berdatangan
|
84 |
+
beri
|
85 |
+
berikan
|
86 |
+
berikut
|
87 |
+
berikutnya
|
88 |
+
berjumlah
|
89 |
+
berkali-kali
|
90 |
+
berkata
|
91 |
+
berkehendak
|
92 |
+
berkeinginan
|
93 |
+
berkenaan
|
94 |
+
berlainan
|
95 |
+
berlalu
|
96 |
+
berlangsung
|
97 |
+
berlebihan
|
98 |
+
bermacam
|
99 |
+
bermacam-macam
|
100 |
+
bermaksud
|
101 |
+
bermula
|
102 |
+
bersama
|
103 |
+
bersama-sama
|
104 |
+
bersiap
|
105 |
+
bersiap-siap
|
106 |
+
bertanya
|
107 |
+
bertanya-tanya
|
108 |
+
berturut
|
109 |
+
berturut-turut
|
110 |
+
bertutur
|
111 |
+
berujar
|
112 |
+
berupa
|
113 |
+
besar
|
114 |
+
betul
|
115 |
+
betulkah
|
116 |
+
biasa
|
117 |
+
biasanya
|
118 |
+
bila
|
119 |
+
bilakah
|
120 |
+
bisa
|
121 |
+
bisakah
|
122 |
+
boleh
|
123 |
+
bolehkah
|
124 |
+
bolehlah
|
125 |
+
buat
|
126 |
+
bukan
|
127 |
+
bukankah
|
128 |
+
bukanlah
|
129 |
+
bukannya
|
130 |
+
bulan
|
131 |
+
bung
|
132 |
+
cara
|
133 |
+
caranya
|
134 |
+
cukup
|
135 |
+
cukupkah
|
136 |
+
cukuplah
|
137 |
+
cuma
|
138 |
+
dahulu
|
139 |
+
dalam
|
140 |
+
dan
|
141 |
+
dapat
|
142 |
+
dari
|
143 |
+
daripada
|
144 |
+
datang
|
145 |
+
dekat
|
146 |
+
demi
|
147 |
+
demikian
|
148 |
+
demikianlah
|
149 |
+
dengan
|
150 |
+
depan
|
151 |
+
di
|
152 |
+
dia
|
153 |
+
diakhiri
|
154 |
+
diakhirinya
|
155 |
+
dialah
|
156 |
+
diantara
|
157 |
+
diantaranya
|
158 |
+
diberi
|
159 |
+
diberikan
|
160 |
+
diberikannya
|
161 |
+
dibuat
|
162 |
+
dibuatnya
|
163 |
+
didapat
|
164 |
+
didatangkan
|
165 |
+
digunakan
|
166 |
+
diibaratkan
|
167 |
+
diibaratkannya
|
168 |
+
diingat
|
169 |
+
diingatkan
|
170 |
+
diinginkan
|
171 |
+
dijawab
|
172 |
+
dijelaskan
|
173 |
+
dijelaskannya
|
174 |
+
dikarenakan
|
175 |
+
dikatakan
|
176 |
+
dikatakannya
|
177 |
+
dikerjakan
|
178 |
+
diketahui
|
179 |
+
diketahuinya
|
180 |
+
dikira
|
181 |
+
dilakukan
|
182 |
+
dilalui
|
183 |
+
dilihat
|
184 |
+
dimaksud
|
185 |
+
dimaksudkan
|
186 |
+
dimaksudkannya
|
187 |
+
dimaksudnya
|
188 |
+
diminta
|
189 |
+
dimintai
|
190 |
+
dimisalkan
|
191 |
+
dimulai
|
192 |
+
dimulailah
|
193 |
+
dimulainya
|
194 |
+
dimungkinkan
|
195 |
+
dini
|
196 |
+
dipastikan
|
197 |
+
diperbuat
|
198 |
+
diperbuatnya
|
199 |
+
dipergunakan
|
200 |
+
diperkirakan
|
201 |
+
diperlihatkan
|
202 |
+
diperlukan
|
203 |
+
diperlukannya
|
204 |
+
dipersoalkan
|
205 |
+
dipertanyakan
|
206 |
+
dipunyai
|
207 |
+
diri
|
208 |
+
dirinya
|
209 |
+
disampaikan
|
210 |
+
disebut
|
211 |
+
disebutkan
|
212 |
+
disebutkannya
|
213 |
+
disini
|
214 |
+
disinilah
|
215 |
+
ditambahkan
|
216 |
+
ditandaskan
|
217 |
+
ditanya
|
218 |
+
ditanyai
|
219 |
+
ditanyakan
|
220 |
+
ditegaskan
|
221 |
+
ditujukan
|
222 |
+
ditunjuk
|
223 |
+
ditunjuki
|
224 |
+
ditunjukkan
|
225 |
+
ditunjukkannya
|
226 |
+
ditunjuknya
|
227 |
+
dituturkan
|
228 |
+
dituturkannya
|
229 |
+
diucapkan
|
230 |
+
diucapkannya
|
231 |
+
diungkapkan
|
232 |
+
dong
|
233 |
+
dua
|
234 |
+
dulu
|
235 |
+
empat
|
236 |
+
enggak
|
237 |
+
enggaknya
|
238 |
+
entah
|
239 |
+
entahlah
|
240 |
+
guna
|
241 |
+
gunakan
|
242 |
+
hal
|
243 |
+
hampir
|
244 |
+
hanya
|
245 |
+
hanyalah
|
246 |
+
hari
|
247 |
+
harus
|
248 |
+
haruslah
|
249 |
+
harusnya
|
250 |
+
hendak
|
251 |
+
hendaklah
|
252 |
+
hendaknya
|
253 |
+
hingga
|
254 |
+
ia
|
255 |
+
ialah
|
256 |
+
ibarat
|
257 |
+
ibaratkan
|
258 |
+
ibaratnya
|
259 |
+
ibu
|
260 |
+
ikut
|
261 |
+
ingat
|
262 |
+
ingat-ingat
|
263 |
+
ingin
|
264 |
+
inginkah
|
265 |
+
inginkan
|
266 |
+
ini
|
267 |
+
inikah
|
268 |
+
inilah
|
269 |
+
itu
|
270 |
+
itukah
|
271 |
+
itulah
|
272 |
+
jadi
|
273 |
+
jadilah
|
274 |
+
jadinya
|
275 |
+
jangan
|
276 |
+
jangankan
|
277 |
+
janganlah
|
278 |
+
jauh
|
279 |
+
jawab
|
280 |
+
jawaban
|
281 |
+
jawabnya
|
282 |
+
jelas
|
283 |
+
jelaskan
|
284 |
+
jelaslah
|
285 |
+
jelasnya
|
286 |
+
jika
|
287 |
+
jikalau
|
288 |
+
juga
|
289 |
+
jumlah
|
290 |
+
jumlahnya
|
291 |
+
justru
|
292 |
+
kala
|
293 |
+
kalau
|
294 |
+
kalaulah
|
295 |
+
kalaupun
|
296 |
+
kalian
|
297 |
+
kami
|
298 |
+
kamilah
|
299 |
+
kamu
|
300 |
+
kamulah
|
301 |
+
kan
|
302 |
+
kapan
|
303 |
+
kapankah
|
304 |
+
kapanpun
|
305 |
+
karena
|
306 |
+
karenanya
|
307 |
+
kasus
|
308 |
+
kata
|
309 |
+
katakan
|
310 |
+
katakanlah
|
311 |
+
katanya
|
312 |
+
ke
|
313 |
+
keadaan
|
314 |
+
kebetulan
|
315 |
+
kecil
|
316 |
+
kedua
|
317 |
+
keduanya
|
318 |
+
keinginan
|
319 |
+
kelamaan
|
320 |
+
kelihatan
|
321 |
+
kelihatannya
|
322 |
+
kelima
|
323 |
+
keluar
|
324 |
+
kembali
|
325 |
+
kemudian
|
326 |
+
kemungkinan
|
327 |
+
kemungkinannya
|
328 |
+
kenapa
|
329 |
+
kepada
|
330 |
+
kepadanya
|
331 |
+
kesampaian
|
332 |
+
keseluruhan
|
333 |
+
keseluruhannya
|
334 |
+
keterlaluan
|
335 |
+
ketika
|
336 |
+
khususnya
|
337 |
+
kini
|
338 |
+
kinilah
|
339 |
+
kira
|
340 |
+
kira-kira
|
341 |
+
kiranya
|
342 |
+
kita
|
343 |
+
kitalah
|
344 |
+
kok
|
345 |
+
kurang
|
346 |
+
lagi
|
347 |
+
lagian
|
348 |
+
lah
|
349 |
+
lain
|
350 |
+
lainnya
|
351 |
+
lalu
|
352 |
+
lama
|
353 |
+
lamanya
|
354 |
+
lanjut
|
355 |
+
lanjutnya
|
356 |
+
lebih
|
357 |
+
lewat
|
358 |
+
lima
|
359 |
+
luar
|
360 |
+
macam
|
361 |
+
maka
|
362 |
+
makanya
|
363 |
+
makin
|
364 |
+
malah
|
365 |
+
malahan
|
366 |
+
mampu
|
367 |
+
mampukah
|
368 |
+
mana
|
369 |
+
manakala
|
370 |
+
manalagi
|
371 |
+
masa
|
372 |
+
masalah
|
373 |
+
masalahnya
|
374 |
+
masih
|
375 |
+
masihkah
|
376 |
+
masing
|
377 |
+
masing-masing
|
378 |
+
mau
|
379 |
+
maupun
|
380 |
+
melainkan
|
381 |
+
melakukan
|
382 |
+
melalui
|
383 |
+
melihat
|
384 |
+
melihatnya
|
385 |
+
memang
|
386 |
+
memastikan
|
387 |
+
memberi
|
388 |
+
memberikan
|
389 |
+
membuat
|
390 |
+
memerlukan
|
391 |
+
memihak
|
392 |
+
meminta
|
393 |
+
memintakan
|
394 |
+
memisalkan
|
395 |
+
memperbuat
|
396 |
+
mempergunakan
|
397 |
+
memperkirakan
|
398 |
+
memperlihatkan
|
399 |
+
mempersiapkan
|
400 |
+
mempersoalkan
|
401 |
+
mempertanyakan
|
402 |
+
mempunyai
|
403 |
+
memulai
|
404 |
+
memungkinkan
|
405 |
+
menaiki
|
406 |
+
menambahkan
|
407 |
+
menandaskan
|
408 |
+
menanti
|
409 |
+
menanti-nanti
|
410 |
+
menantikan
|
411 |
+
menanya
|
412 |
+
menanyai
|
413 |
+
menanyakan
|
414 |
+
mendapat
|
415 |
+
mendapatkan
|
416 |
+
mendatang
|
417 |
+
mendatangi
|
418 |
+
mendatangkan
|
419 |
+
menegaskan
|
420 |
+
mengakhiri
|
421 |
+
mengapa
|
422 |
+
mengatakan
|
423 |
+
mengatakannya
|
424 |
+
mengenai
|
425 |
+
mengerjakan
|
426 |
+
mengetahui
|
427 |
+
menggunakan
|
428 |
+
menghendaki
|
429 |
+
mengibaratkan
|
430 |
+
mengibaratkannya
|
431 |
+
mengingat
|
432 |
+
mengingatkan
|
433 |
+
menginginkan
|
434 |
+
mengira
|
435 |
+
mengucapkan
|
436 |
+
mengucapkannya
|
437 |
+
mengungkapkan
|
438 |
+
menjadi
|
439 |
+
menjawab
|
440 |
+
menjelaskan
|
441 |
+
menuju
|
442 |
+
menunjuk
|
443 |
+
menunjuki
|
444 |
+
menunjukkan
|
445 |
+
menunjuknya
|
446 |
+
menurut
|
447 |
+
menuturkan
|
448 |
+
menyampaikan
|
449 |
+
menyangkut
|
450 |
+
menyatakan
|
451 |
+
menyebutkan
|
452 |
+
menyeluruh
|
453 |
+
menyiapkan
|
454 |
+
merasa
|
455 |
+
mereka
|
456 |
+
merekalah
|
457 |
+
merupakan
|
458 |
+
meski
|
459 |
+
meskipun
|
460 |
+
meyakini
|
461 |
+
meyakinkan
|
462 |
+
minta
|
463 |
+
mirip
|
464 |
+
misal
|
465 |
+
misalkan
|
466 |
+
misalnya
|
467 |
+
mula
|
468 |
+
mulai
|
469 |
+
mulailah
|
470 |
+
mulanya
|
471 |
+
mungkin
|
472 |
+
mungkinkah
|
473 |
+
nah
|
474 |
+
naik
|
475 |
+
namun
|
476 |
+
nanti
|
477 |
+
nantinya
|
478 |
+
nyaris
|
479 |
+
nyatanya
|
480 |
+
oleh
|
481 |
+
olehnya
|
482 |
+
pada
|
483 |
+
padahal
|
484 |
+
padanya
|
485 |
+
pak
|
486 |
+
paling
|
487 |
+
panjang
|
488 |
+
pantas
|
489 |
+
para
|
490 |
+
pasti
|
491 |
+
pastilah
|
492 |
+
penting
|
493 |
+
pentingnya
|
494 |
+
per
|
495 |
+
percuma
|
496 |
+
perlu
|
497 |
+
perlukah
|
498 |
+
perlunya
|
499 |
+
pernah
|
500 |
+
persoalan
|
501 |
+
pertama
|
502 |
+
pertama-tama
|
503 |
+
pertanyaan
|
504 |
+
pertanyakan
|
505 |
+
pihak
|
506 |
+
pihaknya
|
507 |
+
pukul
|
508 |
+
pula
|
509 |
+
pun
|
510 |
+
punya
|
511 |
+
rasa
|
512 |
+
rasanya
|
513 |
+
rata
|
514 |
+
rupanya
|
515 |
+
saat
|
516 |
+
saatnya
|
517 |
+
saja
|
518 |
+
sajalah
|
519 |
+
saling
|
520 |
+
sama
|
521 |
+
sama-sama
|
522 |
+
sambil
|
523 |
+
sampai
|
524 |
+
sampai-sampai
|
525 |
+
sampaikan
|
526 |
+
sana
|
527 |
+
sangat
|
528 |
+
sangatlah
|
529 |
+
satu
|
530 |
+
saya
|
531 |
+
sayalah
|
532 |
+
se
|
533 |
+
sebab
|
534 |
+
sebabnya
|
535 |
+
sebagai
|
536 |
+
sebagaimana
|
537 |
+
sebagainya
|
538 |
+
sebagian
|
539 |
+
sebaik
|
540 |
+
sebaik-baiknya
|
541 |
+
sebaiknya
|
542 |
+
sebaliknya
|
543 |
+
sebanyak
|
544 |
+
sebegini
|
545 |
+
sebegitu
|
546 |
+
sebelum
|
547 |
+
sebelumnya
|
548 |
+
sebenarnya
|
549 |
+
seberapa
|
550 |
+
sebesar
|
551 |
+
sebetulnya
|
552 |
+
sebisanya
|
553 |
+
sebuah
|
554 |
+
sebut
|
555 |
+
sebutlah
|
556 |
+
sebutnya
|
557 |
+
secara
|
558 |
+
secukupnya
|
559 |
+
sedang
|
560 |
+
sedangkan
|
561 |
+
sedemikian
|
562 |
+
sedikit
|
563 |
+
sedikitnya
|
564 |
+
seenaknya
|
565 |
+
segala
|
566 |
+
segalanya
|
567 |
+
segera
|
568 |
+
seharusnya
|
569 |
+
sehingga
|
570 |
+
seingat
|
571 |
+
sejak
|
572 |
+
sejauh
|
573 |
+
sejenak
|
574 |
+
sejumlah
|
575 |
+
sekadar
|
576 |
+
sekadarnya
|
577 |
+
sekali
|
578 |
+
sekali-kali
|
579 |
+
sekalian
|
580 |
+
sekaligus
|
581 |
+
sekalipun
|
582 |
+
sekarang
|
583 |
+
sekarang
|
584 |
+
sekecil
|
585 |
+
seketika
|
586 |
+
sekiranya
|
587 |
+
sekitar
|
588 |
+
sekitarnya
|
589 |
+
sekurang-kurangnya
|
590 |
+
sekurangnya
|
591 |
+
sela
|
592 |
+
selain
|
593 |
+
selaku
|
594 |
+
selalu
|
595 |
+
selama
|
596 |
+
selama-lamanya
|
597 |
+
selamanya
|
598 |
+
selanjutnya
|
599 |
+
seluruh
|
600 |
+
seluruhnya
|
601 |
+
semacam
|
602 |
+
semakin
|
603 |
+
semampu
|
604 |
+
semampunya
|
605 |
+
semasa
|
606 |
+
semasih
|
607 |
+
semata
|
608 |
+
semata-mata
|
609 |
+
semaunya
|
610 |
+
sementara
|
611 |
+
semisal
|
612 |
+
semisalnya
|
613 |
+
sempat
|
614 |
+
semua
|
615 |
+
semuanya
|
616 |
+
semula
|
617 |
+
sendiri
|
618 |
+
sendirian
|
619 |
+
sendirinya
|
620 |
+
seolah
|
621 |
+
seolah-olah
|
622 |
+
seorang
|
623 |
+
sepanjang
|
624 |
+
sepantasnya
|
625 |
+
sepantasnyalah
|
626 |
+
seperlunya
|
627 |
+
seperti
|
628 |
+
sepertinya
|
629 |
+
sepihak
|
630 |
+
sering
|
631 |
+
seringnya
|
632 |
+
serta
|
633 |
+
serupa
|
634 |
+
sesaat
|
635 |
+
sesama
|
636 |
+
sesampai
|
637 |
+
sesegera
|
638 |
+
sesekali
|
639 |
+
seseorang
|
640 |
+
sesuatu
|
641 |
+
sesuatunya
|
642 |
+
sesudah
|
643 |
+
sesudahnya
|
644 |
+
setelah
|
645 |
+
setempat
|
646 |
+
setengah
|
647 |
+
seterusnya
|
648 |
+
setiap
|
649 |
+
setiba
|
650 |
+
setibanya
|
651 |
+
setidak-tidaknya
|
652 |
+
setidaknya
|
653 |
+
setinggi
|
654 |
+
seusai
|
655 |
+
sewaktu
|
656 |
+
siap
|
657 |
+
siapa
|
658 |
+
siapakah
|
659 |
+
siapapun
|
660 |
+
sini
|
661 |
+
sinilah
|
662 |
+
soal
|
663 |
+
soalnya
|
664 |
+
suatu
|
665 |
+
sudah
|
666 |
+
sudahkah
|
667 |
+
sudahlah
|
668 |
+
supaya
|
669 |
+
tadi
|
670 |
+
tadinya
|
671 |
+
tahu
|
672 |
+
tahun
|
673 |
+
tak
|
674 |
+
tambah
|
675 |
+
tambahnya
|
676 |
+
tampak
|
677 |
+
tampaknya
|
678 |
+
tandas
|
679 |
+
tandasnya
|
680 |
+
tanpa
|
681 |
+
tanya
|
682 |
+
tanyakan
|
683 |
+
tanyanya
|
684 |
+
tapi
|
685 |
+
tegas
|
686 |
+
tegasnya
|
687 |
+
telah
|
688 |
+
tempat
|
689 |
+
tengah
|
690 |
+
tentang
|
691 |
+
tentu
|
692 |
+
tentulah
|
693 |
+
tentunya
|
694 |
+
tepat
|
695 |
+
terakhir
|
696 |
+
terasa
|
697 |
+
terbanyak
|
698 |
+
terdahulu
|
699 |
+
terdapat
|
700 |
+
terdiri
|
701 |
+
terhadap
|
702 |
+
terhadapnya
|
703 |
+
teringat
|
704 |
+
teringat-ingat
|
705 |
+
terjadi
|
706 |
+
terjadilah
|
707 |
+
terjadinya
|
708 |
+
terkira
|
709 |
+
terlalu
|
710 |
+
terlebih
|
711 |
+
terlihat
|
712 |
+
termasuk
|
713 |
+
ternyata
|
714 |
+
tersampaikan
|
715 |
+
tersebut
|
716 |
+
tersebutlah
|
717 |
+
tertentu
|
718 |
+
tertuju
|
719 |
+
terus
|
720 |
+
terutama
|
721 |
+
tetap
|
722 |
+
tetapi
|
723 |
+
tiap
|
724 |
+
tiba
|
725 |
+
tiba-tiba
|
726 |
+
tidak
|
727 |
+
tidakkah
|
728 |
+
tidaklah
|
729 |
+
tiga
|
730 |
+
tinggi
|
731 |
+
toh
|
732 |
+
tunjuk
|
733 |
+
turut
|
734 |
+
tutur
|
735 |
+
tuturnya
|
736 |
+
ucap
|
737 |
+
ucapnya
|
738 |
+
ujar
|
739 |
+
ujarnya
|
740 |
+
umum
|
741 |
+
umumnya
|
742 |
+
ungkap
|
743 |
+
ungkapnya
|
744 |
+
untuk
|
745 |
+
usah
|
746 |
+
usai
|
747 |
+
waduh
|
748 |
+
wah
|
749 |
+
wahai
|
750 |
+
waktu
|
751 |
+
waktunya
|
752 |
+
walau
|
753 |
+
walaupun
|
754 |
+
wong
|
755 |
+
yaitu
|
756 |
+
yakin
|
757 |
+
yakni
|
758 |
+
yang
|
759 |
+
yg
|
760 |
+
good
|
761 |
+
d
|
762 |
+
ok
|
763 |
+
nya
|
764 |
+
gk
|
765 |
+
ga
|
766 |
+
gak
|
767 |
+
bagus
|
768 |
+
buka
|
769 |
+
aja
|
770 |
+
ya
|
771 |
+
mantap
|
772 |
+
banget
|
773 |
+
pake
|
774 |
+
sih
|
775 |
+
kasih
|
776 |
+
tdk
|
777 |
+
oke
|
778 |
+
Ok
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
1 |
---
|
2 |
+
title: Bankapps Sentiment
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: green
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.26.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import prediction
|
3 |
+
import eda
|
4 |
+
|
5 |
+
# Function to display the sentiment prediction
|
6 |
+
@st.cache_data
|
7 |
+
def get_prediction(text):
|
8 |
+
return prediction.predict_sentiment(text)
|
9 |
+
|
10 |
+
# Main function for the Streamlit app
|
11 |
+
def main():
|
12 |
+
st.title("Sentiment Analysis App")
|
13 |
+
|
14 |
+
menu = ["Home", "Sentiment Prediction", "Exploratory Data Analysis"]
|
15 |
+
choice = st.sidebar.selectbox("Menu", menu)
|
16 |
+
|
17 |
+
if choice == "Home":
|
18 |
+
st.write("""
|
19 |
+
## Welcome to the Sentiment Analysis App!
|
20 |
+
Navigate to the menu on the left to:
|
21 |
+
- Predict the sentiment of a given review text.
|
22 |
+
- View exploratory data analysis visuals.
|
23 |
+
""")
|
24 |
+
|
25 |
+
elif choice == "Sentiment Prediction":
|
26 |
+
st.write("""
|
27 |
+
### Sentiment Prediction
|
28 |
+
Enter a review text below to predict its sentiment.
|
29 |
+
""")
|
30 |
+
|
31 |
+
# Create a text input widget
|
32 |
+
text = st.text_area("Enter the review text:")
|
33 |
+
if st.button("Predict"):
|
34 |
+
sentiment = get_prediction(text)
|
35 |
+
st.success(f"The sentiment of the review is: **{sentiment}**")
|
36 |
+
|
37 |
+
elif choice == "Exploratory Data Analysis":
|
38 |
+
st.write("""
|
39 |
+
### Exploratory Data Analysis
|
40 |
+
View visualizations derived from the dataset.
|
41 |
+
""")
|
42 |
+
|
43 |
+
# Display wordcloud
|
44 |
+
st.write("### Word Cloud for Reviews")
|
45 |
+
st.pyplot(eda.visualize_wordcloud())
|
46 |
+
|
47 |
+
# Display review lengths distribution
|
48 |
+
st.write("### Distribution of Review Lengths")
|
49 |
+
st.pyplot(eda.plot_review_lengths())
|
50 |
+
|
51 |
+
# Display rating distribution
|
52 |
+
st.write("### Rating Distribution")
|
53 |
+
st.pyplot(eda.rating_distribution())
|
54 |
+
|
55 |
+
if __name__ == '__main__':
|
56 |
+
main()
|
eda.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import seaborn as sns
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from wordcloud import WordCloud
|
5 |
+
|
6 |
+
# Load the dataset internally within the EDA module
|
7 |
+
df = pd.read_csv('Danamon Mobile Banking Reviews(D-Bank Pro).csv')
|
8 |
+
|
9 |
+
def visualize_wordcloud():
|
10 |
+
"""Generate a word cloud for the 'content' column."""
|
11 |
+
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(df['content']))
|
12 |
+
|
13 |
+
plt.figure(figsize=(10, 7))
|
14 |
+
plt.imshow(wordcloud, interpolation='bilinear')
|
15 |
+
plt.axis('off')
|
16 |
+
plt.title('Word Cloud for Reviews')
|
17 |
+
return plt.gcf() # Return the current figure
|
18 |
+
|
19 |
+
def plot_review_lengths():
|
20 |
+
"""Plot the distribution of review lengths."""
|
21 |
+
review_lengths = df['content'].str.len()
|
22 |
+
|
23 |
+
plt.figure(figsize=(12, 6))
|
24 |
+
sns.histplot(review_lengths, bins=50, kde=True)
|
25 |
+
plt.title('Distribution of Review Lengths')
|
26 |
+
plt.xlabel('Review Length (characters)')
|
27 |
+
plt.ylabel('Number of Reviews')
|
28 |
+
return plt.gcf() # Return the current figure
|
29 |
+
|
30 |
+
def rating_distribution():
|
31 |
+
"""Plot the distribution of ratings."""
|
32 |
+
ratings = df['score'].value_counts()
|
33 |
+
|
34 |
+
plt.figure(figsize=(10, 6))
|
35 |
+
sns.barplot(x=ratings.index, y=ratings.values, palette="viridis")
|
36 |
+
plt.title('Rating Distribution')
|
37 |
+
plt.xlabel('Rating')
|
38 |
+
plt.ylabel('Number of Reviews')
|
39 |
+
return plt.gcf() # Return the current figure
|
40 |
+
|
gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
hard_voting_classifier.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aecacff7473f2639092cde8dc584456bd1e38d0e3a3762ef2dab779e7284fa69
|
3 |
+
size 20860385
|
prediction.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import joblib
|
2 |
+
import re
|
3 |
+
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
4 |
+
import emoji
|
5 |
+
|
6 |
+
# Load the model and vectorizer
|
7 |
+
model = joblib.load("hard_voting_classifier.pkl")
|
8 |
+
vectorizer = joblib.load("vectorizer.pkl")
|
9 |
+
|
10 |
+
# Load custom stopwords
|
11 |
+
with open("Indonesia_stopwords.txt", "r") as f:
|
12 |
+
custom_stopwords = [word.strip() for word in f.readlines()]
|
13 |
+
|
14 |
+
def preprocess_data(text):
|
15 |
+
"""Preprocess the input text."""
|
16 |
+
# Case Folding
|
17 |
+
text = text.lower()
|
18 |
+
|
19 |
+
# Sentence Normalization
|
20 |
+
text = emoji.demojize(text) # Translate emojis to their word representation
|
21 |
+
text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs
|
22 |
+
text = re.sub(r'\d+', '', text) # Remove numbers
|
23 |
+
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters except for spaces
|
24 |
+
|
25 |
+
# Tokenization & Stemming
|
26 |
+
stemmer = StemmerFactory().create_stemmer()
|
27 |
+
tokens = [word for word in text.split() if word not in custom_stopwords] # Tokenization and Stopword Removal
|
28 |
+
tokens = [stemmer.stem(word) for word in tokens] # Stemming
|
29 |
+
|
30 |
+
return ' '.join(tokens)
|
31 |
+
|
32 |
+
def predict_sentiment(text):
|
33 |
+
"""Predict the sentiment of the input text."""
|
34 |
+
preprocessed_text = preprocess_data(text)
|
35 |
+
vectorized_text = vectorizer.transform([preprocessed_text])
|
36 |
+
prediction = model.predict(vectorized_text)
|
37 |
+
return "Positive" if prediction[0] == 1 else "Negative"
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
matplotlib
|
4 |
+
seaborn
|
5 |
+
wordcloud
|
6 |
+
joblib
|
7 |
+
scikit-learn==1.2.2
|
8 |
+
emoji
|
9 |
+
nltk
|
10 |
+
xgboost
|
11 |
+
pyLDAvis
|
12 |
+
Sastrawi
|
vectorizer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b4a2ddc8daea502bc7e0dc6dd0b40ca71f93696f063c98a53ee70e60d3de032
|
3 |
+
size 245959
|