Commit
·
9b85aa5
1
Parent(s):
e5b0dd1
Update app.py
Browse filesupdate(stopwords) : hapus words dari list stopwords
app.py
CHANGED
@@ -75,6 +75,15 @@ def load_file(kamus_path, kamus_sendiri_path):
|
|
75 |
nltk.download("stopwords")
|
76 |
stop_words = set(stopwords.words("indonesian"))
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
tfidf_vectorizer = joblib.load(tfidf_model_path)
|
79 |
model_ensemble = joblib.load('ensemble_clf_soft_smote.joblib')
|
80 |
#model_rf
|
|
|
75 |
nltk.download("stopwords")
|
76 |
stop_words = set(stopwords.words("indonesian"))
|
77 |
|
78 |
+
additional_stopwords = [] # Ganti dengan kata-kata yang ingin Anda tambahkan
|
79 |
+
stop_words.update(additional_stopwords)
|
80 |
+
|
81 |
+
# Hapus beberapa kata dari kamus stopwords agar tidak terhapus pada tweets
|
82 |
+
words_to_remove = ['lama', 'datang', 'sekarang', 'amat', 'usah', 'tidak', 'percuma', 'jauh', 'demi', 'apa', 'mana', 'begini', 'waktu', 'kurang', 'ya', 'kah', 'bagaimana', 'gimana','tanya','mau','berapa','jadwal','belum','gamau','dimana','kalo','info','sih','punten','naik','gais','min','ga','bisa','apa', 'enggak' ]
|
83 |
+
for word in words_to_remove:
|
84 |
+
if word in stop_words:
|
85 |
+
stop_words.remove(word)
|
86 |
+
|
87 |
tfidf_vectorizer = joblib.load(tfidf_model_path)
|
88 |
model_ensemble = joblib.load('ensemble_clf_soft_smote.joblib')
|
89 |
#model_rf
|