batraccoon commited on
Commit
36ceedd
1 Parent(s): 6aa084a

First commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ best_model.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import model
4
+
5
+
6
+ st.set_page_config(
7
+ page_title="Home page")
8
+ navigasi = st.sidebar.selectbox('Page Selector :',
9
+ ('Home Page','Model Prediksi', 'EDA'))
10
+
11
+ if navigasi == 'Home Page':
12
+ st.header('Home Page')
13
+ st.write('')
14
+ st.image('https://i.pinimg.com/originals/db/05/a0/db05a03b2adcfedd15c8b91e48cb99a4.gif')
15
+ st.write('Graded Challenge 7')
16
+ st.write('Made By : Angger Rizky Firdaus')
17
+ st.write('From : HCK - 012')
18
+ st.write('Project ini adalah membuat project Natural Language Processing untuk Sentiment Analysis')
19
+ st.write('')
20
+ st.caption('Silahkan pilih menu di Select Box pada sebelah kiri layar :) ')
21
+ st.write('')
22
+ st.write('')
23
+
24
+ elif navigasi == 'Model Prediksi':
25
+ model.run()
26
+ else :
27
+ eda.run()
best_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61bcf44e5b2c87025a9c9cb45b6fc0b3085970ded0e0d4f1b00828658458320
3
+ size 5851481
dataset_tweet_sentiment_pilkada_DKI_2017.csv ADDED
The diff for this file is too large to render. See raw diff
 
eda.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Import library
2
+ import pandas as pd
3
+ import numpy as np
4
+ import streamlit as st
5
+
6
+ #library for visualization
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+ import plotly.express as px
10
+ from wordcloud import WordCloud
11
+
12
+ st.set_page_config(
13
+ page_title="Tweet Sentiment Pilkada 2017 EDA")
14
+ #function untuk menjalakan streamlit model predictor
15
+ def run():
16
+ # Set title
17
+ st.title('Tweet Sentiment Pilkada DKI Jakarta 2017 Dataset : Exploratory Data Analysis')
18
+
19
+
20
+
21
+ #pemisah(garis2)
22
+ st.markdown('----')
23
+
24
+ #image
25
+ st.image('https://images.bisnis.com/posts/2017/02/13/628337/foto.jpg')
26
+ st.text('desc : kertas pemilu kepala daerah DKI Jakarta 2017')
27
+ st.text('img source : bisnis.com')
28
+
29
+
30
+ #load dataset
31
+ df = pd.read_csv('dataset_tweet_sentiment_pilkada_DKI_2017.csv')
32
+
33
+ #title df
34
+ st.subheader('Tweet Sentiment Pilkada DKI Jakarta 2017 Dataframe')
35
+ #show dataframe in streamlit
36
+ st.dataframe(df.head(20))
37
+ st.markdown('----')
38
+
39
+ st.subheader('Exploratory Data Analysis (EDA)')
40
+
41
+
42
+ #visualisasi interaktif piechart
43
+ st.markdown('#### Visualisasi Pie Chart')
44
+ opsi2 = st.selectbox(' Choose the feature : ', ('Pasangan Calon', 'Sentiment'))
45
+ fig, ax = plt.subplots(figsize=(8, 8))
46
+ df[opsi2].value_counts().plot(kind='pie', autopct='%1.1f%%', ax=ax)
47
+ ax.set_title(f'{opsi2} Pie Chart')
48
+ ax.set_ylabel('')
49
+ st.pyplot(fig)
50
+ st.text(f'{opsi2} piechart')
51
+ if opsi2 == 'Pasangan Calon':
52
+ st.write('Dataset ini memiliki perbandingan jumlah tweet untuk setiap pasangan calon yang seimbang yaitu pada 33.3% untuk calon Ahok-Djarot, Agus-Sylvi, dan Anies-Sandi.')
53
+ else:
54
+ st.write('Dataset ini memiliki perbandingan jumlah sentimen positive dan negative yang seimbang yaitu 50% untuk sentimen positive dan negative. Dataset yang seimbang akan berdampak baik pada proses training data karena model akan mempelajari data dengan baik. ')
55
+
56
+ #visualisasi kolom kolom martial_status, education level, dan sex terhadap default status next month
57
+ # Ambil data teks dari DataFrame (misalnya eda['Text Tweet'])
58
+ all_tweets_text = ' '.join(df['Text Tweet'])
59
+
60
+ # Membuat WordCloud
61
+ wordcloud = WordCloud(width=800, height=400, background_color='white', min_font_size=10, max_words=8).generate(all_tweets_text)
62
+
63
+ # Menampilkan WordCloud menggunakan Streamlit
64
+ st.title("WordCloud of Tweets")
65
+ st.image(wordcloud.to_array(), use_column_width=True)
66
+ st.write('Berdasarkan visualisasi wordcloud diatas, berikut adalah 8 kata yang paling banyak muncul pada tweet di dataset ini seperti pilkadadki2017, yang, AHY, AniesSandi, dan, di, AhokDjarot,dan Ahok.')
67
+
68
+
69
+ if __name__ == "__main__":
70
+ run()
model.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from keras.models import load_model
4
+ import numpy as np
5
+ import re
6
+ from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
7
+ from nltk.tokenize import word_tokenize
8
+ import tensorflow as tf
9
+
10
+ st.set_page_config(
11
+ page_title="Indonesian Sentiment Analysis")
12
+ stw = pd.read_csv('https://raw.githubusercontent.com/rizalespe/Dataset-Sentimen-Analisis-Bahasa-Indonesia/master/stopword_tweet_pilkada_DKI_2017.csv')
13
+ stpwds_id = stw.values.tolist()
14
+ stpwds_id.append(['ada'])
15
+ stemmer = StemmerFactory().create_stemmer()
16
+ model = tf.keras.models.load_model('best_model.keras')
17
+
18
+
19
+
20
+ def run():
21
+ st.title("Indonesian Sentiment Analysis App")
22
+ st.image('https://i.pinimg.com/originals/52/ad/6a/52ad6a11c1dcb1692ff9e321bd520167.gif')
23
+ st.subheader("Enter text to analyze sentiment")
24
+
25
+ user_input = st.text_area("Input Text", "")
26
+ if st.button("Analyze"):
27
+ # Case folding
28
+ text1 = user_input.lower()
29
+ # Mention removal
30
+ text2 = re.sub("@[A-Za-z0-9_]+", " ", text1)
31
+ # Hashtags removal
32
+ text3 = re.sub("#[A-Za-z0-9_]+", " ", text2)
33
+ # Newline removal (\n)
34
+ text4 = re.sub(r"\\n", " ",text3)
35
+ # Whitespace removal
36
+ text5 = text4.strip()
37
+ # URL removal
38
+ text6 = re.sub(r"http\S+", " ", text5)
39
+ text7 = re.sub(r"www.\S+", " ", text6)
40
+ # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
41
+ text8 = re.sub("[^A-Za-z\s']", " ", text7)
42
+ # Tokenization
43
+ tokens = word_tokenize(text8)
44
+ # Stopwords removal
45
+ # tokens = [word for word in tokens if word not in stpwds_id]
46
+ tokens1 = []
47
+ for word in tokens:
48
+ if word not in stpwds_id:
49
+ tokens1.append(word)
50
+ # Stemming
51
+ # tokens2 = [stemmer.stem(word) for word in tokens]
52
+ tokens2 = []
53
+ for word in tokens1:
54
+ tokens2.append(stemmer.stem(word))
55
+ # Combining Tokens
56
+ text8 = ' '.join(tokens2)
57
+
58
+ pred = model.predict([[text8]])
59
+ if pred > 0.3:
60
+ sentiment = "Positive"
61
+ else:
62
+ sentiment = "Negative"
63
+ st.write(f"Sentiment: {sentiment}")
64
+
65
+ if __name__ == "__main__":
66
+ run()
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seaborn
2
+ pandas
3
+ matplotlib
4
+ pickleshare
5
+ tensorflow == 2.14.0
6
+ keras == 2.14.0
7
+ numpy
8
+ plotly
9
+ PySastrawi == 1.2.0
10
+ nltk == 3.8.1
11
+ re
12
+ wordcloud