Skripsi / app.py
Jasmineavrile's picture
Update app.py
c3c3b81 verified
import streamlit as st
import pandas as pd
import joblib
from preprocessing import clean_text, case_folding, tokenize, normalized_term, remove_stopwords, stem_text
# Dictionary untuk menyimpan model, vectorizer, dan jumlah fitur yang diinginkan
models = {
'Model': {'svm': 'svm_model.pkl', 'vectorizer': 'tfidf_prabowo_final.pkl', 'features': 'prabowo_selected.pkl'},
}
# Pilihan model
selected_model = st.sidebar.selectbox('Choose Model:', list(models.keys()))
# Load model dan vectorizer untuk teks
selected_model = models[selected_model]
model = joblib.load(selected_model['svm'])
vectorizer = joblib.load(selected_model['vectorizer'])
features = joblib.load(selected_model['features'])
st.title('Website Prediksi Analisis Sentimen Calon Presiden Indonesia 2024')
# # Daftar kata-kata yang langsung diklasifikasikan sebagai Positif
positive_keywords = ['menang', 'bagus', 'keren', 'mantap', 'bijak', 'berhasil']
negative_keywords = ['bangsat', 'anjing', 'hoax','tai','babi','bajingan','penipu','pembohong','telek','sialan', 'gagal','kalah','jelek']
# Fungsi untuk memeriksa apakah teks mengandung kata-kata positif
def contains_positive_keywords(text, keywords):
for word in keywords:
if word in text:
return True
return False
# Fungsi untuk memeriksa apakah teks mengandung kata-kata positif
def contains_negative_keywords(text, keywords):
for word in keywords:
if word in text:
return True
return False
# Pilihan input: Teks atau File
option = st.sidebar.selectbox('Choose Input Option:', ['Text', 'File'])
if option == 'Text':
# Input teks
user_input = st.text_area('Enter Text:', '')
cleaned_text = clean_text(user_input)
folded_text = case_folding(cleaned_text)
tokenized_text = tokenize(folded_text)
normalized_text = normalized_term(tokenized_text)
wstopword_text = remove_stopwords(normalized_text)
stemmed_text = stem_text(wstopword_text)
done_text = ' '.join(stemmed_text)
if st.button('Analyze'):
if contains_positive_keywords(user_input, positive_keywords):
st.write(pd.DataFrame({'Step': ['Cleaning', 'Case Folding', 'Tokenization', 'Normalization', 'Stopword Removal', 'Stemming'],
'Result': [cleaned_text, folded_text, tokenized_text, normalized_text, wstopword_text,
stemmed_text]}))
st.write('Sentiment: Positif')
elif contains_negative_keywords(user_input, negative_keywords):
st.write(pd.DataFrame({'Step': ['Cleaning', 'Case Folding', 'Tokenization', 'Normalization', 'Stopword Removal', 'Stemming'],
'Result': [cleaned_text, folded_text, tokenized_text, normalized_text, wstopword_text,
stemmed_text]}))
st.write('Sentiment: Negatif')
else:
# Preprocessing teks
cleaned_text = clean_text(user_input)
folded_text = case_folding(cleaned_text)
tokenized_text = tokenize(folded_text)
normalized_text = normalized_term(tokenized_text)
wstopword_text = remove_stopwords(normalized_text)
stemmed_text = stem_text(wstopword_text)
done_text = ' '.join(stemmed_text)
# Tampilkan tahapan preprocessing
st.subheader('Preprocessing Steps:')
st.write(pd.DataFrame({'Step': ['Cleaning', 'Case Folding', 'Tokenization', 'Normalization', 'Stopword Removal', 'Stemming'],
'Result': [cleaned_text, folded_text, tokenized_text, normalized_text, wstopword_text,
stemmed_text]}))
# Proses teks dan lakukan prediksi
new_data_tfidf = vectorizer.transform([done_text]).toarray()
new_data_selected = pd.DataFrame(new_data_tfidf, columns=vectorizer.get_feature_names_out())[features].values
prediction = model.predict(new_data_selected)
st.write('Sentiment:', prediction[0])
elif option == 'File':
# Input file Excel/CSV
uploaded_file = st.file_uploader('Upload Excel/CSV File:', type=['csv', 'xlsx'])
if uploaded_file is not None:
# Baca file dan lakukan prediksi
df = pd.read_excel(uploaded_file) if uploaded_file.name.endswith('xlsx') else pd.read_csv(uploaded_file)
# Preprocessing teks dalam kolom 'Text'
df['Cleaned'] = df['text'].apply(clean_text)
df['Case Folded'] = df['Cleaned'].apply(case_folding)
df['Tokenized'] = df['Case Folded'].apply(tokenize)
df['Normalized'] = df['Tokenized'].apply(normalized_term)
df['Stopword Removal'] = df['Normalized'].apply(remove_stopwords)
df['Stemmed'] = df['Stopword Removal'].apply(stem_text)
df['Text Final'] = df['Stemmed'].apply(lambda words: ' '.join(words))
# Proses teks dan lakukan prediksi
# Jumlah fitur yang diinginkan sesuai dengan model SVM
new_data_tfidf = vectorizer.transform(df['Text Final']).toarray()
new_data_selected = pd.DataFrame(new_data_tfidf, columns=vectorizer.get_feature_names_out())[features].values
predictions = []
# for text in df['text']:
# if contains_positive_keywords(text, positive_keywords):
# predictions.append('Positif')
# else:
# new_data_selected_row = new_data_selected[df['text'].tolist().index(text)].reshape(1, -1)
# prediction = model.predict(new_data_selected_row)
# predictions.append(prediction[0])
df['Sentiment'] = predictions
# Tampilkan jumlah label positif dan negatif
st.subheader('Sentiment Distribution:')
st.write(df['Sentiment'].value_counts())
# Tampilkan tahapan preprocessing
st.subheader('Preprocessing Steps:')
st.write(df)