naufalnashif commited on
Commit
9b85aa5
·
1 Parent(s): e5b0dd1

Update app.py

Browse files

update(stopwords) : hapus words dari list stopwords

Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -75,6 +75,15 @@ def load_file(kamus_path, kamus_sendiri_path):
75
  nltk.download("stopwords")
76
  stop_words = set(stopwords.words("indonesian"))
77
 
 
 
 
 
 
 
 
 
 
78
  tfidf_vectorizer = joblib.load(tfidf_model_path)
79
  model_ensemble = joblib.load('ensemble_clf_soft_smote.joblib')
80
  #model_rf
 
75
  nltk.download("stopwords")
76
  stop_words = set(stopwords.words("indonesian"))
77
 
78
+ additional_stopwords = [] # Ganti dengan kata-kata yang ingin Anda tambahkan
79
+ stop_words.update(additional_stopwords)
80
+
81
+ # Hapus beberapa kata dari kamus stopwords agar tidak terhapus pada tweets
82
+ words_to_remove = ['lama', 'datang', 'sekarang', 'amat', 'usah', 'tidak', 'percuma', 'jauh', 'demi', 'apa', 'mana', 'begini', 'waktu', 'kurang', 'ya', 'kah', 'bagaimana', 'gimana','tanya','mau','berapa','jadwal','belum','gamau','dimana','kalo','info','sih','punten','naik','gais','min','ga','bisa','apa', 'enggak' ]
83
+ for word in words_to_remove:
84
+ if word in stop_words:
85
+ stop_words.remove(word)
86
+
87
  tfidf_vectorizer = joblib.load(tfidf_model_path)
88
  model_ensemble = joblib.load('ensemble_clf_soft_smote.joblib')
89
  #model_rf