Nakhwa commited on
Commit
696314f
1 Parent(s): 7cb4f6a

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ mafindo_mix_llm.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Set page configuration
4
+ st.set_page_config(page_title="Hoax Detection Dashboard", layout="wide")
5
+ st.title("Dashboard Deteksi Berita Hoax")
6
+
7
+ from home import show_home
8
+ from deteksi_content import show_deteksi_konten
9
+ from deteksi_upload import show_deteksi_upload
10
+
11
+ # Create tabs
12
+ tab1, tab2, tab3 = st.tabs(["Home", "Deteksi Konten", "Deteksi File"])
13
+
14
+ with tab1:
15
+ show_home()
16
+
17
+ with tab2:
18
+ show_deteksi_konten()
19
+
20
+ with tab3:
21
+ show_deteksi_upload()
deteksi_content.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from datetime import datetime
3
+ import pandas as pd
4
+ from lime.lime_text import LimeTextExplainer
5
+ from test import predict_hoax, predict_proba_for_lime
6
+ import streamlit.components.v1 as components
7
+ from load_model import load_model
8
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
9
+ from styles import COMMON_CSS
10
+ from google.cloud import storage
11
+ import os
12
+ from io import StringIO
13
+
14
+ # Set environment variable for Google Cloud credentials
15
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:\DashboardHoax\inbound-source-431806-g7-e49e388ce0be.json"
16
+
17
+ def save_corrections_to_gcs(bucket_name, file_name, correction_data):
18
+ client = storage.Client() # Uses the credentials set by the environment variable
19
+ bucket = client.bucket("dashboardhoax-bucket")
20
+ blob = bucket.blob("koreksi_pengguna_content.csv")
21
+
22
+ # Check if the blob (file) exists
23
+ if blob.exists():
24
+ # Download existing CSV from GCS
25
+ existing_data = blob.download_as_string().decode('utf-8')
26
+ existing_df = pd.read_csv(StringIO(existing_data))
27
+ else:
28
+ # Create a new DataFrame if the file does not exist
29
+ existing_df = pd.DataFrame(columns=['Timestamp', 'Title', 'Content', 'Prediction', 'Correction'])
30
+
31
+ # Append the new data to the existing data
32
+ new_data_df = pd.DataFrame(correction_data)
33
+ updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
34
+
35
+ # Convert the DataFrame back to CSV and upload
36
+ updated_csv_data = updated_df.to_csv(index=False)
37
+ blob.upload_from_string(updated_csv_data, content_type='text/csv')
38
+
39
+ def show_deteksi_kontengcs():
40
+ st.markdown(COMMON_CSS, unsafe_allow_html=True)
41
+
42
+ if 'correction' not in st.session_state:
43
+ st.session_state.correction = None
44
+ if 'detection_result' not in st.session_state:
45
+ st.session_state.detection_result = None
46
+ if 'lime_explanation' not in st.session_state:
47
+ st.session_state.lime_explanation = None
48
+ if 'headline' not in st.session_state:
49
+ st.session_state.headline = ""
50
+ if 'content' not in st.session_state:
51
+ st.session_state.content = ""
52
+ if 'is_correct' not in st.session_state:
53
+ st.session_state.is_correct = None
54
+
55
+ # Dropdown for selecting a model
56
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
57
+ selected_model = st.selectbox(
58
+ "",
59
+ [
60
+ "cahya/bert-base-indonesian-522M",
61
+ "indobenchmark/indobert-base-p2",
62
+ "indolem/indobert-base-uncased",
63
+ "mdhugol/indonesia-bert-sentiment-classification"
64
+ ],
65
+ key="model_selector_content"
66
+ )
67
+
68
+ # Load the selected model
69
+ tokenizer, model = load_model(selected_model)
70
+
71
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Masukkan Judul Berita :</h6>", unsafe_allow_html=True)
72
+ st.session_state.headline = st.text_input("", value=st.session_state.headline)
73
+
74
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Masukkan Konten Berita :</h6>", unsafe_allow_html=True)
75
+ st.session_state.content = st.text_area("", value=st.session_state.content)
76
+
77
+ # Detection button
78
+ if st.button("Deteksi", key="detect_content"):
79
+ st.session_state.detection_result = predict_hoax(st.session_state.headline, st.session_state.content)
80
+ st.success(f"Prediksi: {st.session_state.detection_result}")
81
+
82
+ # Prepare the text for LIME
83
+ lime_texts = [f"{st.session_state.headline} [SEP] {st.session_state.content}"]
84
+
85
+ # Add a spinner and progress bar to indicate processing
86
+ with st.spinner("Sedang memproses LIME, harap tunggu..."):
87
+ # Explain the prediction
88
+ explainer = LimeTextExplainer(class_names=['NON-HOAX', 'HOAX'])
89
+ explanation = explainer.explain_instance(lime_texts[0], predict_proba_for_lime, num_features=5, num_samples=1000)
90
+
91
+ # Save the LIME explanation in session state
92
+ st.session_state.lime_explanation = explanation.as_html()
93
+
94
+ # Display the detection result and LIME explanation if available
95
+ if st.session_state.lime_explanation:
96
+ lime_html = st.session_state.lime_explanation
97
+
98
+ # Inject CSS for font size adjustment
99
+ lime_html = f"""
100
+ <style>
101
+ .lime-text-explanation, .lime-highlight, .lime-classification,
102
+ .lime-text-explanation * {{
103
+ font-size: 14px !important;
104
+ }}
105
+ </style>
106
+ <div class="lime-text-explanation">
107
+ {lime_html}
108
+ </div>
109
+ """
110
+ components.html(lime_html, height=200, scrolling=True)
111
+
112
+ # Display a radio button asking if the detection result is correct
113
+ if st.session_state.detection_result is not None:
114
+ st.markdown("<h6 style='font-size: 16px; margin-bottom: -150px;'>Apakah hasil deteksi sudah benar?</h6>", unsafe_allow_html=True)
115
+ st.session_state.is_correct = st.radio("", ("Ya", "Tidak"))
116
+
117
+ if st.session_state.is_correct == "Ya":
118
+ st.success("Deteksi sudah benar.")
119
+ else:
120
+ # Determine the correction based on the prediction
121
+ st.session_state.correction = "HOAX" if st.session_state.detection_result == "NON-HOAX" else "NON-HOAX"
122
+
123
+ # Display the correction DataFrame
124
+ correction_data = [{
125
+ 'Title': st.session_state.headline,
126
+ 'Content': st.session_state.content,
127
+ 'Prediction': st.session_state.detection_result,
128
+ 'Correction': st.session_state.correction,
129
+ 'Timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
130
+ }]
131
+
132
+ # Save button
133
+ if st.button("Simpan"):
134
+ # Save the correction data to GCS
135
+ save_corrections_to_gcs("your-bucket-name", "koreksi_pengguna.csv", correction_data)
136
+
137
+ # Create a formatted string with CSS for alignment and multi-line content handling
138
+ formatted_text = f"""
139
+ <div style='font-size: 14px;'>
140
+ <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Title</span> : <span style='white-space: pre-wrap;'>{st.session_state.headline}</span></p>
141
+ <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Content</span> : <span style='white-space: pre-wrap;'>{st.session_state.content}</span></p>
142
+ <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Prediction</span> : {st.session_state.detection_result}</p>
143
+ <p style='margin: 0;'><span style='display: inline-block; width: 120px; font-weight: bold;'>Correction</span> : {st.session_state.correction}</p>
144
+ </div>
145
+ """
146
+
147
+ # Display the correction as text
148
+ st.markdown(formatted_text, unsafe_allow_html=True)
149
+ st.success("Koreksi telah disimpan.")
deteksi_upload.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
4
+ from test import predict_hoax, evaluate_model_performance
5
+ from load_model import load_model
6
+ from styles import COMMON_CSS
7
+ from google.cloud import storage
8
+ from io import StringIO
9
+ import os
10
+ from datetime import datetime
11
+
12
+ # Set environment variable for Google Cloud credentials
13
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "D:/DashboardHoax/inbound-source-431806-g7-e49e388ce0be.json"
14
+
15
+ def save_corrections_to_gcs(bucket_name, file_name, correction_data):
16
+ client = storage.Client()
17
+ bucket = client.bucket("dashboardhoax-bucket")
18
+ blob = bucket.blob("koreksi_pengguna_file.csv")
19
+
20
+ # Check if the blob (file) exists
21
+ if blob.exists():
22
+ # Download existing CSV from GCS
23
+ existing_data = blob.download_as_string().decode('utf-8')
24
+ existing_df = pd.read_csv(StringIO(existing_data))
25
+ else:
26
+ # Create a new DataFrame if the file does not exist
27
+ existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
28
+
29
+ # Append the new data to the existing data
30
+ new_data_df = pd.DataFrame(correction_data)
31
+ updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
32
+
33
+ # Convert the DataFrame back to CSV and upload
34
+ updated_csv_data = updated_df.to_csv(index=False)
35
+ blob.upload_from_string(updated_csv_data, content_type='text/csv')
36
+
37
+ def load_data(file):
38
+ return pd.read_csv(file)
39
+
40
+ def show_deteksi_uploadgcs():
41
+ st.markdown(COMMON_CSS, unsafe_allow_html=True)
42
+
43
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
44
+ selected_model = st.selectbox(
45
+ "",
46
+ [
47
+ "cahya/bert-base-indonesian-522M",
48
+ "indobenchmark/indobert-base-p2",
49
+ "indolem/indobert-base-uncased",
50
+ "mdhugol/indonesia-bert-sentiment-classification"
51
+ ],
52
+ key="model_selector_upload"
53
+ )
54
+
55
+ tokenizer, model = load_model(selected_model)
56
+
57
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
58
+ uploaded_file = st.file_uploader("", type="csv")
59
+
60
+ if 'df' not in st.session_state:
61
+ st.session_state.df = None
62
+
63
+ if uploaded_file is not None:
64
+ df = load_data(uploaded_file)
65
+ df.index = df.index + 1
66
+
67
+ st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)
68
+
69
+ grid_options = GridOptionsBuilder.from_dataframe(df)
70
+ grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
71
+ gridOptions = grid_options.build()
72
+
73
+ AgGrid(
74
+ df,
75
+ gridOptions=gridOptions,
76
+ update_mode=GridUpdateMode.VALUE_CHANGED,
77
+ use_container_width=True
78
+ )
79
+
80
+ if st.button("Deteksi", key="detect_upload"):
81
+ try:
82
+ df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
83
+ df['Correction'] = False
84
+ st.session_state.df = df.copy()
85
+ except Exception as e:
86
+ st.error(f"Terjadi kesalahan saat deteksi: {e}")
87
+
88
+ if st.session_state.df is not None:
89
+
90
+ accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
91
+ performance_text = (
92
+ f"*Performansi Model*\n\n"
93
+ f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
94
+ f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
95
+ f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
96
+ f"*F1 Score:* {round(f1, 2)}"
97
+ )
98
+
99
+ st.success(performance_text)
100
+
101
+ st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
102
+
103
+ cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
104
+ df_reordered = st.session_state.df[cols]
105
+
106
+ grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
107
+ grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
108
+ grid_options.configure_default_column(editable=True, groupable=True)
109
+ gridOptions = grid_options.build()
110
+
111
+ grid_response = AgGrid(
112
+ st.session_state.df,
113
+ gridOptions=gridOptions,
114
+ update_mode=GridUpdateMode.VALUE_CHANGED
115
+ )
116
+
117
+ if grid_response['data'] is not None:
118
+ edited_df = pd.DataFrame(grid_response['data'])
119
+ st.session_state.df = edited_df.copy()
120
+ corrected_df = edited_df[edited_df['Correction']].copy()
121
+
122
+ edited_df['Result_Correction'] = edited_df.apply(lambda row:
123
+ 'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
124
+ ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
125
+ axis=1
126
+ )
127
+
128
+ st.session_state.df = edited_df.copy()
129
+
130
+ if not corrected_df.empty:
131
+ corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
132
+ 'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
133
+ ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
134
+ axis=1
135
+ )
136
+
137
+ # Add Timestamp only for saving
138
+ corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
139
+
140
+ cols = ['Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction']
141
+ corrected_df_to_display = corrected_df[cols]
142
+
143
+ st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
144
+ st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
145
+ else:
146
+ st.write("Tidak ada data yang dikoreksi.")
147
+
148
+ if st.button("Simpan", key="corrected_data"):
149
+ if 'df' in st.session_state:
150
+ corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
151
+ corrected_df['Timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
152
+ corrected_df = corrected_df.drop(columns=['Correction'])
153
+
154
+ if not corrected_df.empty:
155
+ # Define GCS bucket and file name
156
+ bucket_name = "your-bucket-name"
157
+ file_name = "corrected_upload_data.csv"
158
+
159
+ # Convert DataFrame to list of dicts for GCS
160
+ correction_data = corrected_df.to_dict(orient='records')
161
+
162
+ # Save corrected data to GCS
163
+ save_corrections_to_gcs(bucket_name, file_name, correction_data)
164
+
165
+ st.success("Data telah disimpan.")
166
+ st.session_state.corrected_df = corrected_df
167
+ else:
168
+ st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
169
+ else:
170
+ st.warning("Data deteksi tidak ditemukan.")
home.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ from wordcloud import WordCloud, STOPWORDS
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Caching data loading
8
+ @st.cache_data
9
+ def load_data():
10
+ df = pd.read_csv("mafindo_mix_llm.csv")
11
+ return df
12
+
13
+ # Caching WordCloud generation
14
+ @st.cache_resource
15
+ def generate_wordcloud(text, colormap, stopwords):
16
+ wordcloud = WordCloud(width=500, height=200, background_color='white', colormap=colormap, stopwords=stopwords).generate(text)
17
+ return wordcloud
18
+
19
+ def show_home():
20
+ # Load the dataset
21
+ df = load_data()
22
+
23
+ # Convert 'Tanggal' to datetime
24
+ df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
25
+ df['Year'] = df['Tanggal'].dt.year
26
+
27
+ # Convert text columns to string to avoid type errors
28
+ df['Content'] = df['Content'].astype(str)
29
+
30
+ # Define additional stopwords
31
+ additional_stopwords = {"dan", "di", "yang", "ke", "dari", "untuk", "pada", "adalah", "sebuah", "dengan", "tersebut", "ini", "itu", "atau", "dalam", "juga", "adalah", "yg", "tapi"}
32
+
33
+ # Combine default stopwords with additional stopwords
34
+ combined_stopwords = set(STOPWORDS).union(additional_stopwords)
35
+
36
+
37
+ # Row with 4 visualizations
38
+ col1, col2, col3, col4 = st.columns([1.5, 2.5, 1.5, 2.5])
39
+
40
+ # Visualization 1: Bar chart for Hoax vs Non-Hoax using Plotly
41
+ with col1:
42
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax</h6>", unsafe_allow_html=True)
43
+ df_label_counts = df['Label'].value_counts().reset_index()
44
+ df_label_counts.columns = ['Label', 'Jumlah']
45
+ bar_chart_label = px.bar(df_label_counts, x='Label', y='Jumlah', color='Label',
46
+ color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
47
+ bar_chart_label.update_layout(
48
+ width=200, height=150, xaxis_title='Label', yaxis_title='Jumlah',
49
+ xaxis_title_font_size=10, yaxis_title_font_size=10,
50
+ xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
51
+ showlegend=False
52
+ )
53
+ st.plotly_chart(bar_chart_label, use_container_width=False)
54
+
55
+ # Visualization 2: Bar chart for Hoax vs Non-Hoax per Data Source using Plotly
56
+ with col2:
57
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax vs Non-Hoax per Data Source</h6>", unsafe_allow_html=True)
58
+ datasource_label_counts = df.groupby(['Datasource', 'Label']).size().reset_index(name='counts')
59
+ fig_datasource = px.bar(datasource_label_counts, x='Datasource', y='counts', color='Label', barmode='group',
60
+ color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'})
61
+ fig_datasource.update_layout(
62
+ width=500, height=150, xaxis_title='Datasource', yaxis_title='Jumlah',
63
+ xaxis_title_font_size=10, yaxis_title_font_size=10,
64
+ xaxis_tickfont_size=6, yaxis_tickfont_size=8, xaxis_tickangle=0,
65
+ margin=dict(t=10, b=10, l=10, r=50),
66
+ legend=dict(
67
+ font=dict(size=8), # Smaller font size for the legend
68
+ traceorder='normal',
69
+ orientation='v', # Vertical orientation of the legend
70
+ title_text='Label', # Title for the legend
71
+ yanchor='top', y=1, xanchor='left', x=1.05, # Adjust position of the legend
72
+ bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
73
+ bordercolor='rgba(0, 0, 0, 0)' # No border color
74
+ ),
75
+ showlegend=True
76
+ )
77
+ st.plotly_chart(fig_datasource, use_container_width=False)
78
+
79
+ # Visualization 3: Line chart for Hoax per Year using Plotly
80
+ with col3:
81
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Hoax per Tahun</h6>", unsafe_allow_html=True)
82
+
83
+ # Filter data to include only years up to 2023
84
+ hoax_per_year = df[(df['Label'] == 'HOAX') & (df['Year'] <= 2023)].groupby('Year').size().reset_index(name='count')
85
+
86
+ line_chart_hoax = px.line(hoax_per_year, x='Year', y='count', line_shape='linear',
87
+ color_discrete_sequence=['red'])
88
+ line_chart_hoax.update_layout(
89
+ width=200, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Hoax',
90
+ xaxis_title_font_size=10, yaxis_title_font_size=10,
91
+ xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
92
+ showlegend=False
93
+ )
94
+ st.plotly_chart(line_chart_hoax, use_container_width=False)
95
+
96
+
97
+ # Visualization 4: Bar chart for Topics per Year using Plotly
98
+ with col4:
99
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Topik per Tahun</h6>", unsafe_allow_html=True)
100
+ df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y')
101
+ df['Year'] = df['Tanggal'].dt.year
102
+
103
+ # Filter the data to include only years up to 2023
104
+ df_mafindo_filtered = df[df['Year'] <= 2023]
105
+
106
+ topics_per_year = df_mafindo_filtered.groupby(['Year', 'Topic']).size().reset_index(name='count')
107
+
108
+ # Create the vertical bar chart
109
+ bar_chart_topics = px.bar(topics_per_year, x='Year', y='count', color='Topic',
110
+ color_continuous_scale=px.colors.sequential.Viridis)
111
+
112
+ # Update layout to adjust the legend
113
+ bar_chart_topics.update_layout(
114
+ width=600, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Topik',
115
+ xaxis_title_font_size=10, yaxis_title_font_size=10,
116
+ xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10),
117
+ showlegend=True,
118
+ legend=dict(
119
+ yanchor="top", y=1, xanchor="left", x=1.02, # Adjust position of the legend
120
+ bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
121
+ bordercolor='rgba(0, 0, 0, 0)', # No border color
122
+ itemclick='toggleothers', # Allow toggling of legend items
123
+ itemsizing='constant', # Consistent sizing for legend items
124
+ font=dict(size=8),
125
+ traceorder='normal',
126
+ orientation='v', # Vertical orientation of legend
127
+ title_text='Topic'
128
+ )
129
+ )
130
+
131
+ st.plotly_chart(bar_chart_topics, use_container_width=True)
132
+
133
+
134
+ # Create a new row for WordCloud visualizations
135
+ col5, col6, col7 = st.columns([2, 2.5, 2.5])
136
+
137
+ # Wordcloud for Hoax
138
+ with col5:
139
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Wordcloud for Hoax</h6>", unsafe_allow_html=True)
140
+ hoax_text = ' '.join(df[df['Label'] == 'HOAX']['Content'])
141
+ wordcloud_hoax = generate_wordcloud(hoax_text, 'Reds', combined_stopwords)
142
+ fig_hoax = plt.figure(figsize=(5, 2.5))
143
+ plt.imshow(wordcloud_hoax, interpolation='bilinear')
144
+ plt.axis('off')
145
+ st.pyplot(fig_hoax)
146
+
147
+ with col6:
148
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Klasifikasi</h6>", unsafe_allow_html=True)
149
+ df_classification_counts = df['Classification'].value_counts().reset_index()
150
+ df_classification_counts.columns = ['Classification', 'Count']
151
+
152
+ # Create the donut chart
153
+ donut_chart_classification = px.pie(df_classification_counts, names='Classification', values='Count',
154
+ hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)
155
+
156
+ # Update layout to move the legend and adjust its size
157
+ donut_chart_classification.update_layout(
158
+ width=300, height=170, # Adjust the size of the chart
159
+ margin=dict(t=20, b=20, l=20, r=120), # Adjust margins to make room for the legend
160
+ legend=dict(
161
+ yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend
162
+ bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
163
+ bordercolor='rgba(0, 0, 0, 0)', # No border color
164
+ itemclick='toggleothers', # Allow toggling of legend items
165
+ itemsizing='constant', # Consistent sizing for legend items
166
+ font=dict(size=8), # Smaller font size for the legend
167
+ traceorder='normal',
168
+ orientation='v', # Vertical legend
169
+ title_text='Classification' # Title for the legend
170
+ )
171
+ )
172
+ st.plotly_chart(donut_chart_classification, use_container_width=True)
173
+
174
+ with col7:
175
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Tone</h6>", unsafe_allow_html=True)
176
+ df_tone_counts = df['Tone'].value_counts().reset_index()
177
+ df_tone_counts.columns = ['Tone', 'Count']
178
+
179
+ # Create the donut chart
180
+ donut_chart_tone = px.pie(df_tone_counts, names='Tone', values='Count',
181
+ hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2)
182
+
183
+ # Update layout to move the legend and adjust its size
184
+ donut_chart_tone.update_layout(
185
+ width=250, height=170, # Adjust the size of the chart
186
+ margin=dict(t=20, b=20, l=20, r=100), # Adjust margins to make room for the legend
187
+ legend=dict(
188
+ yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend
189
+ bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend
190
+ bordercolor='rgba(0, 0, 0, 0)', # No border color
191
+ itemclick='toggleothers', # Allow toggling of legend items
192
+ itemsizing='constant', # Consistent sizing for legend items
193
+ font=dict(size=8), # Smaller font size for the legend
194
+ traceorder='normal',
195
+ orientation='v', # Vertical legend
196
+ title_text='Tone' # Title for the legend
197
+ )
198
+ )
199
+ st.plotly_chart(donut_chart_tone, use_container_width=True)
200
+
201
+ # Evaluation Metrics Table
202
+ data = [
203
+ ["indobenchmark/indobert-base-p2", 0.6898, 0.9793, 0.8094, 0.8400, 0.1981, 0.3206, 0.7023],
204
+ ["cahya/bert-base-indonesian-522M", 0.7545, 0.8756, 0.8106, 0.6800, 0.4811, 0.5635, 0.7358],
205
+ ["indolem/indobert-base-uncased", 0.7536, 0.8238, 0.7871, 0.6136, 0.5094, 0.5567, 0.7124],
206
+ ["mdhugol/indonesia-bert-sentiment-classification", 0.7444, 0.8601, 0.7981, 0.6447, 0.4623, 0.5385, 0.7191]
207
+ ]
208
+
209
+ highest_accuracy = max(data, key=lambda x: x[-1])
210
+
211
+ # Header Table
212
+ html_table = """
213
+ <table style="width:100%; border-collapse: collapse; font-size: 12px;">
214
+ <tr>
215
+ <th rowspan="2" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">Pre-trained Model</th>
216
+ <th colspan="3" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">NON-HOAX</th>
217
+ <th colspan="3" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">HOAX</th>
218
+ <th rowspan="2" style="border: 1px solid black; padding: 5px; font-size: 14px; text-align: center;">Accuracy</th>
219
+ </tr>
220
+ <tr>
221
+ <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
222
+ <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
223
+ <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
224
+ <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Precision</th>
225
+ <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">Recall</th>
226
+ <th style="border: 1px solid black; padding: 5px; font-size: 12px; width:80px; text-align: center;">F1-Score</th>
227
+ </tr>
228
+ """
229
+ # Isi Data
230
+ for row in data:
231
+ if row == highest_accuracy:
232
+ html_table += "<tr style='background-color: #41B3A2; font-size: 12px;'>"
233
+ else:
234
+ html_table += "<tr style= ' font-size: 12px;'>"
235
+ for item in row:
236
+ html_table += f"<td style='border: 1px solid black; padding: 5px; font-size: 12px;'>{item}</td>"
237
+ html_table += "</tr>"
238
+
239
+ html_table += "</table>"
240
+ # Tampilkan Tabel di Streamlit
241
+ col8 = st.columns([5])
242
+ with col8[0]:
243
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Evaluation Metrics</h6>", unsafe_allow_html=True)
244
+ st.markdown(html_table, unsafe_allow_html=True)
245
+
246
+ html_table_col9 = """
247
+ <div style='text-align: center;'>
248
+ <table style="width: 100%; margin: -5px 0; font-size: 12px; border-collapse: collapse; border: 1px solid black;">
249
+ <thead>
250
+ <tr style="background-color: #e0e0e0;">
251
+ <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Label</th>
252
+ <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Train</th>
253
+ <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Test</th>
254
+ <th style="padding: 8px; border: 1px solid black; font-weight: bold;">Dev</th>
255
+ </tr>
256
+ </thead>
257
+ <tbody>
258
+ <tr style="border-bottom: 1px solid black;">
259
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">HOAX</td>
260
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">11,563</td>
261
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">193</td>
262
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">193</td>
263
+ </tr>
264
+ <tr style="border-bottom: 1px solid black;">
265
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">NON-HOAX</td>
266
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">789</td>
267
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">106</td>
268
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">106</td>
269
+ </tr>
270
+ <tr style="font-weight: bold; border-top: 1px solid black;">
271
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">TOTAL</td>
272
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">12,352</td>
273
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">299</td>
274
+ <td style="padding: 8px; border: 1px solid black; text-align: center;">299</td>
275
+ </tr>
276
+ </tbody>
277
+ </table>
278
+ </div>
279
+ """
280
+
281
+ # Display the table in col9 using HTML
282
+ col9 = st.columns([1]) # Adjust the number and width of columns as needed
283
+ with col9[0]:
284
+ st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Statistik Data</h6>", unsafe_allow_html=True)
285
+ st.markdown(html_table_col9, unsafe_allow_html=True)
inbound-source-431806-g7-e49e388ce0be.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "inbound-source-431806-g7",
4
+ "private_key_id": "e49e388ce0bed9704aedad42a56d8e3982e0120f",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC2eXeqNjR8Gaeb\npUekLAbieWiqvxGak71OFj4t1/fBemDduW1tsjN1biZzosJR3KERpIWQ0z0vbDwt\n3zOuvdf7XxEZ09Iopp3TRqk/qrrAQzanLAhkAJ8K8czNAlkaZ3KYW2j9bU+xk4P8\nFNpoHZwFZJczLujFxULxIU90KGqigXdvkdvyevfH1mxLlCuXL6F6bFsHuF8ckt2Q\nWKQ4bVnHW8w6CymhmJVgFxX68HxoTbObeoaRzkd5kjJvdJ+A4MQbdzLyHxlPxxZn\nme1LICIQKlsUrk6MSHzMcrl8BN0lMj8k6DgIO2WD/uCXRwemAJl3YoJc5BZN2Luz\nI0DTsDB1AgMBAAECggEANEVga6BicYhR1IrIlnVMNZUM0BiyvMKEkHlbr3s1zDU3\nyVwkRi+tgP6gQjDGFHgspaao4j84wDxzkrplDjHwzF/DwM/GXIG6JTsRIZ1RKOE4\nJzQ8ZRUueg6hGbsJ9j/a+lz5Gtu04Av/W3dHx1pwBrV1gKJ36KtkzTk7Du3C+jC6\n4fQeTim1ebrIkj3Hu88lV4cDMJENWDnHFXj0ww8SygaNDoT7X9E96iRwubMiE7AK\nI8JNJqFsO1S7nQPDbp4KceQwjVOWE1djTxtxYhcFAGOSacKVKcLz0mcQstXvUzns\nUfIj1+2l8dxRsHMRkg+bllD99aXJ2PrLKBp13ia90wKBgQDqKAVBtlATOFWtsdmG\nPJ6i1+SzzokuGoQO7UwweLtiGXMelr1rdmGTbtsg+/OQC2sFfDJtyKM2xhcXc07U\nxGQZGrPaGmVEAMDujayxgEyw46Dm8H9phGUekoAO8dsSRHynZ4KIGULtZZ/jasYp\nnHJOFVUeL9libv2hdyBC2zPWfwKBgQDHfzZdBKI9/OVo9S69CCoJ+lXs+n7H0/H5\n1wXLYcVfurVs4p+AGXA+F+bZJGFnrYWUwTS8DbB4cTISCURyanxd/IU22qfjp646\nJPTpMLefdqRf01x5jxOHt3NbWTwOWQL/jCoC10VaIeY0jAWRcpYpGj/lbNenyQB6\nQWO8GyeHCwKBgQC1EgOWoBvl8P9YVRqoEoJ93MNvQ/yS2VBblqb/KK2Gm7WI5vpN\nenrUHrp3FD5xmlLFKBh7CtcjySUcLj+8iq35N8vykczTPF31Wzs6+8LSWwQW8c0l\nVIs5jAJZDC/jPXDDp2iqRBacK6TroKrijKdbuGVc9ZV95+RcExmweX/pkwKBgQCI\nSyry5cWKIAsDZ+6kir1dz7+Ahaq0DuLUU8jLqGJWApMMbs+VjsuWQHIgi7BYSr5m\nYJEMoTWdM4iHtfkjSgjplSnVzhDBgb+QTctcvUHWGhI2vYoCKnOnVvfiwtY63ykj\nOblB85yX9Wz3HWp4chaQwjRBI9k58iL3Y1EmJE8e/QKBgF1HJKXaaXNogVHN/O5+\nh5YvWAQlWkmfL9sD89Gt1regkd+DM/Vfx+0yPuCgfopmOc72WO5gMQ6TlcH+MRQS\nPc2O7cHbit2IxsKfYYANOLjfhXAiIYC+yvArdzTwn53Wni+USnFH1YD1XUV8wTGj\nT0XsKoxnGUq4twTDK6re0oRl\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "dashboardhoax-service-account@inbound-source-431806-g7.iam.gserviceaccount.com",
7
+ "client_id": "110233701696815226341",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dashboardhoax-service-account%40inbound-source-431806-g7.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
load_model.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BertTokenizer, BertForSequenceClassification
2
+ import streamlit as st
3
+
4
+ # Dictionary to map model names to their paths
5
+ model_paths = {
6
+ "cahya/bert-base-indonesian-522M": "Nakhwa/cahyabert",
7
+ "indobenchmark/indobert-base-p2": "Nakhwa/indobenchmark",
8
+ "indolem/indobert-base-uncased": "Nakhwa/indolem",
9
+ "mdhugol/indonesia-bert-sentiment-classification": "Nakhwa/mdhugol"
10
+ }
11
+
12
+ # Function to load the selected model
13
+ @st.cache_resource
14
+ def load_model(model_name):
15
+ path = model_paths[model_name]
16
+ tokenizer = BertTokenizer.from_pretrained(path)
17
+ model = BertForSequenceClassification.from_pretrained(path)
18
+ model.eval()
19
+ return tokenizer, model
mafindo_mix_llm.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf40365bdcaf731eadc84f7c1622c75763d7277631749bed31adad6e90ff8e6
3
+ size 19922497
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.37.1
2
+ pandas==2.2.2
3
+ plotly==5.13.0
4
+ wordcloud==1.9.3
5
+ matplotlib==3.9.2
6
+ lime==0.2.0.1
7
+ torch==2.3.1
8
+ numpy==1.26.4
9
+ transformers==4.41.2
10
+ streamlit-aggrid==1.0.5
11
+ scikit-learn==1.5.1
styles.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # styles.py
2
+
3
+ COMMON_CSS = """
4
+ <style>
5
+ .stSelectbox div[data-baseweb="select"] {
6
+ margin-top: -35px;
7
+ }
8
+ .stTextInput div[data-baseweb="input"] {
9
+ margin-top: -35px;
10
+ }
11
+ .stTextArea div[data-baseweb="textarea"] {
12
+ margin-top: -35px;
13
+ }
14
+ .stFileUploader div[data-baseweb="input"] {
15
+ margin-top: -100px;
16
+ }
17
+ .stSelectbox {
18
+ max-width: 300px;
19
+ }
20
+ .stTextInput, .stTextArea {
21
+ max-width: 1400px;
22
+ }
23
+ .stSelectbox div, .stTextInput input, .stTextArea textarea {
24
+ font-size: 14px;
25
+ }
26
+ .stButton > button {
27
+ font-size: 6px;
28
+ padding: 2px 8px;
29
+ border-radius: 10px;
30
+ background-color: #1560BD;
31
+ color: white;
32
+ }
33
+ .stButton > button:hover {
34
+ background-color: #1560BD;
35
+ border: none;
36
+ outline: none;
37
+ }
38
+ .stRadio div[data-baseweb="radio"] {
39
+ font-size: 14px; /* Ensure font size for the entire radio button group */
40
+ margin-top: -100px; /* Reduce margin between label and radio button */
41
+ }
42
+ </style>
43
+ """
test.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.nn.functional import softmax
3
+ from load_model import load_model # Import the load_model function
4
+ import numpy as np
5
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
6
+ import streamlit as st
7
+
8
+ @st.cache_resource
9
+ def get_model_and_tokenizer(model_name):
10
+ return load_model(model_name)
11
+
12
+ # Initialize default model (could be anything, or even load dynamically)
13
+ default_model_name = "cahya/bert-base-indonesian-522M"
14
+ tokenizer, model = load_model(default_model_name)
15
+
16
+ # Prediction function
17
+ def predict_hoax(title, content):
18
+ if tokenizer is None or model is None:
19
+ raise ValueError("Model and tokenizer must be loaded before prediction.")
20
+
21
+ print(f"Using model: {model}")
22
+ print(f"Using tokenizer: {tokenizer}")
23
+
24
+ text = f"{title} [SEP] {content}"
25
+ inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
26
+ with torch.no_grad():
27
+ outputs = model(**inputs)
28
+ probs = softmax(outputs.logits, dim=1)
29
+ pred = torch.argmax(probs, dim=1).item()
30
+ label = 'HOAX' if pred == 1 else 'NON-HOAX'
31
+ return label
32
+
33
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
+ # model.to(device)
35
+
36
+ # LIME prediction function
37
+ def predict_proba_for_lime(texts):
38
+ results = []
39
+ for text in texts:
40
+ inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
41
+ with torch.no_grad():
42
+ outputs = model(**inputs)
43
+ probs = softmax(outputs.logits, dim=1).detach().cpu().numpy()
44
+ results.append(probs[0])
45
+ return np.array(results)
46
+
47
+ def evaluate_model_performance(df, tokenizer, model):
48
+ true_labels = []
49
+ pred_labels = []
50
+
51
+ for index, row in df.iterrows():
52
+ true_label = row['Label'] # Menggunakan 'Title' sebagai label sebenarnya karena tidak ada 'Final_Result'
53
+ pred_label = predict_hoax(row['Title'], row['Content'])
54
+
55
+ true_labels.append(1 if true_label == 'HOAX' else 0)
56
+ pred_labels.append(1 if pred_label == 'HOAX' else 0)
57
+
58
+ accuracy = accuracy_score(true_labels, pred_labels)
59
+ precision = precision_score(true_labels, pred_labels, average='binary')
60
+ recall = recall_score(true_labels, pred_labels, average='binary')
61
+ f1 = f1_score(true_labels, pred_labels, average='binary')
62
+
63
+ return accuracy, precision, recall, f1