Spaces:
Runtime error
Runtime error
batraccoon
commited on
Commit
•
36ceedd
1
Parent(s):
6aa084a
First commit
Browse files- .gitattributes +1 -0
- app.py +27 -0
- best_model.keras +3 -0
- dataset_tweet_sentiment_pilkada_DKI_2017.csv +0 -0
- eda.py +70 -0
- model.py +66 -0
- requirements.txt +12 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
best_model.keras filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import model
|
4 |
+
|
5 |
+
|
6 |
+
st.set_page_config(
|
7 |
+
page_title="Home page")
|
8 |
+
navigasi = st.sidebar.selectbox('Page Selector :',
|
9 |
+
('Home Page','Model Prediksi', 'EDA'))
|
10 |
+
|
11 |
+
if navigasi == 'Home Page':
|
12 |
+
st.header('Home Page')
|
13 |
+
st.write('')
|
14 |
+
st.image('https://i.pinimg.com/originals/db/05/a0/db05a03b2adcfedd15c8b91e48cb99a4.gif')
|
15 |
+
st.write('Graded Challenge 7')
|
16 |
+
st.write('Made By : Angger Rizky Firdaus')
|
17 |
+
st.write('From : HCK - 012')
|
18 |
+
st.write('Project ini adalah membuat project Natural Language Processing untuk Sentiment Analysis')
|
19 |
+
st.write('')
|
20 |
+
st.caption('Silahkan pilih menu di Select Box pada sebelah kiri layar :) ')
|
21 |
+
st.write('')
|
22 |
+
st.write('')
|
23 |
+
|
24 |
+
elif navigasi == 'Model Prediksi':
|
25 |
+
model.run()
|
26 |
+
else :
|
27 |
+
eda.run()
|
best_model.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b61bcf44e5b2c87025a9c9cb45b6fc0b3085970ded0e0d4f1b00828658458320
|
3 |
+
size 5851481
|
dataset_tweet_sentiment_pilkada_DKI_2017.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eda.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#Import library
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
#library for visualization
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import seaborn as sns
|
9 |
+
import plotly.express as px
|
10 |
+
from wordcloud import WordCloud
|
11 |
+
|
12 |
+
st.set_page_config(
|
13 |
+
page_title="Tweet Sentiment Pilkada 2017 EDA")
|
14 |
+
#function untuk menjalakan streamlit model predictor
|
15 |
+
def run():
|
16 |
+
# Set title
|
17 |
+
st.title('Tweet Sentiment Pilkada DKI Jakarta 2017 Dataset : Exploratory Data Analysis')
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
#pemisah(garis2)
|
22 |
+
st.markdown('----')
|
23 |
+
|
24 |
+
#image
|
25 |
+
st.image('https://images.bisnis.com/posts/2017/02/13/628337/foto.jpg')
|
26 |
+
st.text('desc : kertas pemilu kepala daerah DKI Jakarta 2017')
|
27 |
+
st.text('img source : bisnis.com')
|
28 |
+
|
29 |
+
|
30 |
+
#load dataset
|
31 |
+
df = pd.read_csv('dataset_tweet_sentiment_pilkada_DKI_2017.csv')
|
32 |
+
|
33 |
+
#title df
|
34 |
+
st.subheader('Tweet Sentiment Pilkada DKI Jakarta 2017 Dataframe')
|
35 |
+
#show dataframe in streamlit
|
36 |
+
st.dataframe(df.head(20))
|
37 |
+
st.markdown('----')
|
38 |
+
|
39 |
+
st.subheader('Exploratory Data Analysis (EDA)')
|
40 |
+
|
41 |
+
|
42 |
+
#visualisasi interaktif piechart
|
43 |
+
st.markdown('#### Visualisasi Pie Chart')
|
44 |
+
opsi2 = st.selectbox(' Choose the feature : ', ('Pasangan Calon', 'Sentiment'))
|
45 |
+
fig, ax = plt.subplots(figsize=(8, 8))
|
46 |
+
df[opsi2].value_counts().plot(kind='pie', autopct='%1.1f%%', ax=ax)
|
47 |
+
ax.set_title(f'{opsi2} Pie Chart')
|
48 |
+
ax.set_ylabel('')
|
49 |
+
st.pyplot(fig)
|
50 |
+
st.text(f'{opsi2} piechart')
|
51 |
+
if opsi2 == 'Pasangan Calon':
|
52 |
+
st.write('Dataset ini memiliki perbandingan jumlah tweet untuk setiap pasangan calon yang seimbang yaitu pada 33.3% untuk calon Ahok-Djarot, Agus-Sylvi, dan Anies-Sandi.')
|
53 |
+
else:
|
54 |
+
st.write('Dataset ini memiliki perbandingan jumlah sentimen positive dan negative yang seimbang yaitu 50% untuk sentimen positive dan negative. Dataset yang seimbang akan berdampak baik pada proses training data karena model akan mempelajari data dengan baik. ')
|
55 |
+
|
56 |
+
#visualisasi kolom kolom martial_status, education level, dan sex terhadap default status next month
|
57 |
+
# Ambil data teks dari DataFrame (misalnya eda['Text Tweet'])
|
58 |
+
all_tweets_text = ' '.join(df['Text Tweet'])
|
59 |
+
|
60 |
+
# Membuat WordCloud
|
61 |
+
wordcloud = WordCloud(width=800, height=400, background_color='white', min_font_size=10, max_words=8).generate(all_tweets_text)
|
62 |
+
|
63 |
+
# Menampilkan WordCloud menggunakan Streamlit
|
64 |
+
st.title("WordCloud of Tweets")
|
65 |
+
st.image(wordcloud.to_array(), use_column_width=True)
|
66 |
+
st.write('Berdasarkan visualisasi wordcloud diatas, berikut adalah 8 kata yang paling banyak muncul pada tweet di dataset ini seperti pilkadadki2017, yang, AHY, AniesSandi, dan, di, AhokDjarot,dan Ahok.')
|
67 |
+
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
run()
|
model.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from keras.models import load_model
|
4 |
+
import numpy as np
|
5 |
+
import re
|
6 |
+
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
7 |
+
from nltk.tokenize import word_tokenize
|
8 |
+
import tensorflow as tf
|
9 |
+
|
10 |
+
st.set_page_config(
|
11 |
+
page_title="Indonesian Sentiment Analysis")
|
12 |
+
stw = pd.read_csv('https://raw.githubusercontent.com/rizalespe/Dataset-Sentimen-Analisis-Bahasa-Indonesia/master/stopword_tweet_pilkada_DKI_2017.csv')
|
13 |
+
stpwds_id = stw.values.tolist()
|
14 |
+
stpwds_id.append(['ada'])
|
15 |
+
stemmer = StemmerFactory().create_stemmer()
|
16 |
+
model = tf.keras.models.load_model('best_model.keras')
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
def run():
|
21 |
+
st.title("Indonesian Sentiment Analysis App")
|
22 |
+
st.image('https://i.pinimg.com/originals/52/ad/6a/52ad6a11c1dcb1692ff9e321bd520167.gif')
|
23 |
+
st.subheader("Enter text to analyze sentiment")
|
24 |
+
|
25 |
+
user_input = st.text_area("Input Text", "")
|
26 |
+
if st.button("Analyze"):
|
27 |
+
# Case folding
|
28 |
+
text1 = user_input.lower()
|
29 |
+
# Mention removal
|
30 |
+
text2 = re.sub("@[A-Za-z0-9_]+", " ", text1)
|
31 |
+
# Hashtags removal
|
32 |
+
text3 = re.sub("#[A-Za-z0-9_]+", " ", text2)
|
33 |
+
# Newline removal (\n)
|
34 |
+
text4 = re.sub(r"\\n", " ",text3)
|
35 |
+
# Whitespace removal
|
36 |
+
text5 = text4.strip()
|
37 |
+
# URL removal
|
38 |
+
text6 = re.sub(r"http\S+", " ", text5)
|
39 |
+
text7 = re.sub(r"www.\S+", " ", text6)
|
40 |
+
# Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
|
41 |
+
text8 = re.sub("[^A-Za-z\s']", " ", text7)
|
42 |
+
# Tokenization
|
43 |
+
tokens = word_tokenize(text8)
|
44 |
+
# Stopwords removal
|
45 |
+
# tokens = [word for word in tokens if word not in stpwds_id]
|
46 |
+
tokens1 = []
|
47 |
+
for word in tokens:
|
48 |
+
if word not in stpwds_id:
|
49 |
+
tokens1.append(word)
|
50 |
+
# Stemming
|
51 |
+
# tokens2 = [stemmer.stem(word) for word in tokens]
|
52 |
+
tokens2 = []
|
53 |
+
for word in tokens1:
|
54 |
+
tokens2.append(stemmer.stem(word))
|
55 |
+
# Combining Tokens
|
56 |
+
text8 = ' '.join(tokens2)
|
57 |
+
|
58 |
+
pred = model.predict([[text8]])
|
59 |
+
if pred > 0.3:
|
60 |
+
sentiment = "Positive"
|
61 |
+
else:
|
62 |
+
sentiment = "Negative"
|
63 |
+
st.write(f"Sentiment: {sentiment}")
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
seaborn
|
2 |
+
pandas
|
3 |
+
matplotlib
|
4 |
+
pickleshare
|
5 |
+
tensorflow == 2.14.0
|
6 |
+
keras == 2.14.0
|
7 |
+
numpy
|
8 |
+
plotly
|
9 |
+
PySastrawi == 1.2.0
|
10 |
+
nltk == 3.8.1
|
11 |
+
re
|
12 |
+
wordcloud
|