|
import streamlit as st |
|
import joblib |
|
import datetime |
|
import snscrape.modules.twitter as sntwitter |
|
import pandas as pd |
|
from tensorflow.keras.models import load_model |
|
import nltk |
|
import re |
|
import tensorflow as tf |
|
import numpy as np |
|
from nltk.corpus import stopwords |
|
from nltk.tokenize import word_tokenize |
|
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
nltk.download('stopwords') |
|
nltk.download('punkt') |
|
nltk.download('wordnet') |
|
nltk.download('omw-1.4') |
|
nlp = load_model('model_lstm.tf') |
|
|
|
def run(): |
|
|
|
attributes_container = [] |
|
|
|
stock = st.selectbox('Pick a stock:', ('BBNI', 'BBRI', 'BBTN', 'BMRI')) |
|
|
|
today = datetime.datetime.now() |
|
today = today.strftime('%Y-%m-%d') |
|
yesterday = datetime.datetime.now() - datetime.timedelta(days=1) |
|
yesterday = yesterday.strftime('%Y-%m-%d') |
|
|
|
|
|
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(stock.lower() + ' since:' + yesterday + ' until:' + today).get_items()): |
|
attributes_container.append([tweet.user.username, tweet.date, tweet.likeCount, tweet.sourceLabel, tweet.content]) |
|
|
|
|
|
tweets_df = pd.DataFrame(attributes_container, columns=["User", "Date Created", "Number of Likes", "Source of Tweet", "Tweet"]) |
|
tweets = tweets_df[tweets_df['Tweet'].str.contains(stock)] |
|
tweets_daily = pd.DataFrame(pd.to_datetime(tweets['Date Created']).dt.tz_localize(None)) |
|
tweets['Date Created'] = tweets_daily |
|
tweets['Date Created'] = pd.to_datetime(tweets['Date Created']).dt.date |
|
tweets |
|
|
|
if st.button('Predict sentiment'): |
|
df = pd.DataFrame() |
|
|
|
|
|
idn = list(set(stopwords.words('indonesian'))) |
|
factory = StemmerFactory() |
|
stemmer = factory.create_stemmer() |
|
|
|
def text_process(text): |
|
|
|
text = text.lower() |
|
|
|
|
|
text = re.sub("@[A-Za-z0-9_]+", " ", text) |
|
|
|
|
|
text = re.sub("#[A-Za-z0-9_]+", " ", text) |
|
|
|
|
|
text = re.sub(r"\\n", " ",text) |
|
|
|
|
|
text = text.strip() |
|
|
|
|
|
text = re.sub(r"http\S+", " ", text) |
|
text = re.sub(r"www.\S+", " ", text) |
|
|
|
|
|
text = re.sub("[^A-Za-z\s']", " ", text) |
|
|
|
|
|
text = re.sub("rt", " ",text) |
|
|
|
|
|
tokens = word_tokenize(text) |
|
|
|
|
|
text = ' '.join([word for word in tokens if word not in idn]) |
|
|
|
|
|
text = ' '.join([stemmer.stem(word) for word in text.split()]) |
|
|
|
return text |
|
|
|
df['Tweet_processed'] = tweets['Tweet'].apply(lambda x: text_process(x)) |
|
pred = np.argmax(nlp.predict(df['Tweet_processed']), axis=-1) |
|
pred_df = pd.DataFrame(pred, columns=['label']) |
|
|
|
pred_df['label'] = pred_df.replace([0,1,2], ['positive','neutral','negative']) |
|
|
|
def PieComposition(dataframe, column): |
|
palette_color = sns.color_palette('pastel') |
|
data = {} |
|
freq = {} |
|
datalen = len(dataframe[column].unique()) |
|
x = np.arange(datalen) |
|
dq = dataframe[column].unique() |
|
for i in x: |
|
data[i] = dq[i] |
|
freq[i] = dataframe[column][dataframe[column] == dq[i]].value_counts().sum() |
|
data = list(data.values()) |
|
freq = list(freq.values()) |
|
fig = plt.figure(figsize=(15, 5)) |
|
plt.pie(freq, labels = data, colors=palette_color, autopct='%.0f%%') |
|
plt.show() |
|
st.pyplot(fig) |
|
|
|
PieComposition(pred_df, 'label') |
|
|
|
if __name__ == '__main__': |
|
run() |