batraccoon's picture
Update model.py
ecf417a verified
import streamlit as st
import pandas as pd
from keras.models import load_model
import numpy as np
import re
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import nltk
from nltk.tokenize import word_tokenize
import tensorflow as tf
st.set_page_config(
page_title="Indonesian Sentiment Analysis")
stw = pd.read_csv('https://raw.githubusercontent.com/rizalespe/Dataset-Sentimen-Analisis-Bahasa-Indonesia/master/stopword_tweet_pilkada_DKI_2017.csv')
stpwds_id = stw.values.tolist()
stpwds_id.append(['ada'])
stemmer = StemmerFactory().create_stemmer()
model = load_model('best_model.H5')
nltk.download('punkt')
def run():
st.title("Indonesian Sentiment Analysis App")
st.image('https://i.pinimg.com/originals/52/ad/6a/52ad6a11c1dcb1692ff9e321bd520167.gif')
st.subheader("Enter text to analyze sentiment")
user_input = st.text_area("Input Text", "")
if st.button("Analyze"):
# Case folding
text1 = user_input.lower()
# Mention removal
text2 = re.sub("@[A-Za-z0-9_]+", " ", text1)
# Hashtags removal
text3 = re.sub("#[A-Za-z0-9_]+", " ", text2)
# Newline removal (\n)
text4 = re.sub(r"\\n", " ",text3)
# Whitespace removal
text5 = text4.strip()
# URL removal
text6 = re.sub(r"http\S+", " ", text5)
text7 = re.sub(r"www.\S+", " ", text6)
# Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
text8 = re.sub("[^A-Za-z\s']", " ", text7)
# Tokenization
tokens = word_tokenize(text8)
# Stopwords removal
# tokens = [word for word in tokens if word not in stpwds_id]
tokens1 = []
for word in tokens:
if word not in stpwds_id:
tokens1.append(word)
# Stemming
# tokens2 = [stemmer.stem(word) for word in tokens]
tokens2 = []
for word in tokens1:
tokens2.append(stemmer.stem(word))
# Combining Tokens
text8 = ' '.join(tokens2)
pred = model.predict([[text8]])
if pred > 0.3:
sentiment = "Positive"
else:
sentiment = "Negative"
st.write(f"Sentiment: {sentiment}")
if __name__ == "__main__":
run()