import streamlit as st import pandas as pd from keras.models import load_model import numpy as np import re from Sastrawi.Stemmer.StemmerFactory import StemmerFactory import nltk from nltk.tokenize import word_tokenize import tensorflow as tf st.set_page_config( page_title="Indonesian Sentiment Analysis") stw = pd.read_csv('https://raw.githubusercontent.com/rizalespe/Dataset-Sentimen-Analisis-Bahasa-Indonesia/master/stopword_tweet_pilkada_DKI_2017.csv') stpwds_id = stw.values.tolist() stpwds_id.append(['ada']) stemmer = StemmerFactory().create_stemmer() model = load_model('best_model.H5') nltk.download('punkt') def run(): st.title("Indonesian Sentiment Analysis App") st.image('https://i.pinimg.com/originals/52/ad/6a/52ad6a11c1dcb1692ff9e321bd520167.gif') st.subheader("Enter text to analyze sentiment") user_input = st.text_area("Input Text", "") if st.button("Analyze"): # Case folding text1 = user_input.lower() # Mention removal text2 = re.sub("@[A-Za-z0-9_]+", " ", text1) # Hashtags removal text3 = re.sub("#[A-Za-z0-9_]+", " ", text2) # Newline removal (\n) text4 = re.sub(r"\\n", " ",text3) # Whitespace removal text5 = text4.strip() # URL removal text6 = re.sub(r"http\S+", " ", text5) text7 = re.sub(r"www.\S+", " ", text6) # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc text8 = re.sub("[^A-Za-z\s']", " ", text7) # Tokenization tokens = word_tokenize(text8) # Stopwords removal # tokens = [word for word in tokens if word not in stpwds_id] tokens1 = [] for word in tokens: if word not in stpwds_id: tokens1.append(word) # Stemming # tokens2 = [stemmer.stem(word) for word in tokens] tokens2 = [] for word in tokens1: tokens2.append(stemmer.stem(word)) # Combining Tokens text8 = ' '.join(tokens2) pred = model.predict([[text8]]) if pred > 0.3: sentiment = "Positive" else: sentiment = "Negative" st.write(f"Sentiment: {sentiment}") if __name__ == "__main__": run()