Nakhwa commited on
Commit
fc8b513
1 Parent(s): 02ccda8

Update deteksi_upload.py

Browse files
Files changed (1) hide show
  1. deteksi_upload.py +18 -32
deteksi_upload.py CHANGED
@@ -10,7 +10,6 @@ import os
10
  from datetime import datetime
11
  import pytz
12
 
13
- # Set environment variable for Google Cloud credentials using secrets
14
  with open("credentials.json", "w") as f:
15
  f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
16
 
@@ -20,21 +19,16 @@ def save_corrections_to_gcs(bucket_name, file_name, correction_data):
20
  client = storage.Client()
21
  bucket = client.bucket("dashboardhoax-bucket")
22
  blob = bucket.blob("koreksi_pengguna_file.csv")
23
-
24
- # Check if the blob (file) exists
25
  if blob.exists():
26
- # Download existing CSV from GCS
27
  existing_data = blob.download_as_string().decode('utf-8')
28
  existing_df = pd.read_csv(StringIO(existing_data))
29
  else:
30
- # Create a new DataFrame if the file does not exist
31
  existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
32
 
33
- # Append the new data to the existing data
34
  new_data_df = pd.DataFrame(correction_data)
35
  updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
36
 
37
- # Convert the DataFrame back to CSV and upload
38
  updated_csv_data = updated_df.to_csv(index=False)
39
  blob.upload_from_string(updated_csv_data, content_type='text/csv')
40
 
@@ -72,8 +66,8 @@ def show_deteksi_upload():
72
 
73
  grid_options = GridOptionsBuilder.from_dataframe(df)
74
  grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
 
75
  gridOptions = grid_options.build()
76
- gridOptions['defaultColDef'] = {'fontSize': 10}
77
 
78
  AgGrid(
79
  df,
@@ -91,17 +85,16 @@ def show_deteksi_upload():
91
  st.error(f"Terjadi kesalahan saat deteksi: {e}")
92
 
93
  if st.session_state.df is not None:
94
-
95
- accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
96
- performance_text = (
97
- f"*Performansi Model*\n\n"
98
- f"*Accuracy:* {round(accuracy, 2)}  "
99
- f"*Precision:* {round(precision, 2)}  "
100
- f"*Recall:* {round(recall, 2)}  "
101
- f"*F1 Score:* {round(f1, 2)}"
102
- )
103
-
104
- st.success(performance_text)
105
 
106
  st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
107
 
@@ -111,6 +104,7 @@ def show_deteksi_upload():
111
  grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
112
  grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
113
  grid_options.configure_default_column(editable=True, groupable=True)
 
114
  gridOptions = grid_options.build()
115
 
116
  grid_response = AgGrid(
@@ -133,18 +127,14 @@ def show_deteksi_upload():
133
  st.session_state.df = edited_df.copy()
134
 
135
  if not corrected_df.empty:
136
- corrected_df['Result_Correction'] = corrected_df.apply(lambda row:
137
- 'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else
138
- ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
139
- axis=1
140
- )
141
 
142
- # Add Timestamp only for saving
143
  wib = pytz.timezone('Asia/Jakarta')
144
  corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
145
 
146
- cols = ['Timestamp', 'Result_Correction', 'Result_Detection', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
147
- corrected_df_to_display = corrected_df[cols]
148
 
149
  st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
150
  st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
@@ -154,20 +144,16 @@ def show_deteksi_upload():
154
  if st.button("Simpan", key="corrected_data"):
155
  if 'df' in st.session_state:
156
  corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
157
-
158
  wib = pytz.timezone('Asia/Jakarta')
159
  corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
160
  corrected_df = corrected_df.drop(columns=['Correction'])
161
 
162
  if not corrected_df.empty:
163
- # Define GCS bucket and file name
164
  bucket_name = "your-bucket-name"
165
  file_name = "corrected_upload_data.csv"
166
 
167
- # Convert DataFrame to list of dicts for GCS
168
  correction_data = corrected_df.to_dict(orient='records')
169
 
170
- # Save corrected data to GCS
171
  save_corrections_to_gcs(bucket_name, file_name, correction_data)
172
 
173
  st.success("Data telah disimpan.")
@@ -175,4 +161,4 @@ def show_deteksi_upload():
175
  else:
176
  st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
177
  else:
178
- st.warning("Data deteksi tidak ditemukan.")
 
10
  from datetime import datetime
11
  import pytz
12
 
 
13
  with open("credentials.json", "w") as f:
14
  f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
15
 
 
19
  client = storage.Client()
20
  bucket = client.bucket("dashboardhoax-bucket")
21
  blob = bucket.blob("koreksi_pengguna_file.csv")
22
+
 
23
  if blob.exists():
 
24
  existing_data = blob.download_as_string().decode('utf-8')
25
  existing_df = pd.read_csv(StringIO(existing_data))
26
  else:
 
27
  existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
28
 
 
29
  new_data_df = pd.DataFrame(correction_data)
30
  updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
31
 
 
32
  updated_csv_data = updated_df.to_csv(index=False)
33
  blob.upload_from_string(updated_csv_data, content_type='text/csv')
34
 
 
66
 
67
  grid_options = GridOptionsBuilder.from_dataframe(df)
68
  grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
69
+ grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
70
  gridOptions = grid_options.build()
 
71
 
72
  AgGrid(
73
  df,
 
85
  st.error(f"Terjadi kesalahan saat deteksi: {e}")
86
 
87
  if st.session_state.df is not None:
88
+ if 'Label' in st.session_state.df.columns:
89
+ accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
90
+ performance_text = (
91
+ f"*Performansi Model*\n\n"
92
+ f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
93
+ f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
94
+ f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
95
+ f"*F1 Score:* {round(f1, 2)}"
96
+ )
97
+ st.success(performance_text)
 
98
 
99
  st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
100
 
 
104
  grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
105
  grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
106
  grid_options.configure_default_column(editable=True, groupable=True)
107
+ grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
108
  gridOptions = grid_options.build()
109
 
110
  grid_response = AgGrid(
 
127
  st.session_state.df = edited_df.copy()
128
 
129
  if not corrected_df.empty:
130
+ expected_cols = ['Timestamp', 'Result_Detection', 'Result_Correction', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
131
+ existing_cols = [col for col in expected_cols if col in corrected_df.columns]
 
 
 
132
 
133
+ # Tambahkan Timestamp hanya untuk penyimpanan
134
  wib = pytz.timezone('Asia/Jakarta')
135
  corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
136
 
137
+ corrected_df_to_display = corrected_df[existing_cols]
 
138
 
139
  st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
140
  st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
 
144
  if st.button("Simpan", key="corrected_data"):
145
  if 'df' in st.session_state:
146
  corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
 
147
  wib = pytz.timezone('Asia/Jakarta')
148
  corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
149
  corrected_df = corrected_df.drop(columns=['Correction'])
150
 
151
  if not corrected_df.empty:
 
152
  bucket_name = "your-bucket-name"
153
  file_name = "corrected_upload_data.csv"
154
 
 
155
  correction_data = corrected_df.to_dict(orient='records')
156
 
 
157
  save_corrections_to_gcs(bucket_name, file_name, correction_data)
158
 
159
  st.success("Data telah disimpan.")
 
161
  else:
162
  st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
163
  else:
164
+ st.warning("Data deteksi tidak ditemukan.")