Spaces:
Running
Running
Update deteksi_upload.py
Browse files- deteksi_upload.py +18 -32
deteksi_upload.py
CHANGED
@@ -10,7 +10,6 @@ import os
|
|
10 |
from datetime import datetime
|
11 |
import pytz
|
12 |
|
13 |
-
# Set environment variable for Google Cloud credentials using secrets
|
14 |
with open("credentials.json", "w") as f:
|
15 |
f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
|
16 |
|
@@ -20,21 +19,16 @@ def save_corrections_to_gcs(bucket_name, file_name, correction_data):
|
|
20 |
client = storage.Client()
|
21 |
bucket = client.bucket("dashboardhoax-bucket")
|
22 |
blob = bucket.blob("koreksi_pengguna_file.csv")
|
23 |
-
|
24 |
-
# Check if the blob (file) exists
|
25 |
if blob.exists():
|
26 |
-
# Download existing CSV from GCS
|
27 |
existing_data = blob.download_as_string().decode('utf-8')
|
28 |
existing_df = pd.read_csv(StringIO(existing_data))
|
29 |
else:
|
30 |
-
# Create a new DataFrame if the file does not exist
|
31 |
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
|
32 |
|
33 |
-
# Append the new data to the existing data
|
34 |
new_data_df = pd.DataFrame(correction_data)
|
35 |
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
36 |
|
37 |
-
# Convert the DataFrame back to CSV and upload
|
38 |
updated_csv_data = updated_df.to_csv(index=False)
|
39 |
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
40 |
|
@@ -72,8 +66,8 @@ def show_deteksi_upload():
|
|
72 |
|
73 |
grid_options = GridOptionsBuilder.from_dataframe(df)
|
74 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
|
|
75 |
gridOptions = grid_options.build()
|
76 |
-
gridOptions['defaultColDef'] = {'fontSize': 10}
|
77 |
|
78 |
AgGrid(
|
79 |
df,
|
@@ -91,17 +85,16 @@ def show_deteksi_upload():
|
|
91 |
st.error(f"Terjadi kesalahan saat deteksi: {e}")
|
92 |
|
93 |
if st.session_state.df is not None:
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
st.success(performance_text)
|
105 |
|
106 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
|
107 |
|
@@ -111,6 +104,7 @@ def show_deteksi_upload():
|
|
111 |
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
|
112 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
113 |
grid_options.configure_default_column(editable=True, groupable=True)
|
|
|
114 |
gridOptions = grid_options.build()
|
115 |
|
116 |
grid_response = AgGrid(
|
@@ -133,18 +127,14 @@ def show_deteksi_upload():
|
|
133 |
st.session_state.df = edited_df.copy()
|
134 |
|
135 |
if not corrected_df.empty:
|
136 |
-
|
137 |
-
|
138 |
-
('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']),
|
139 |
-
axis=1
|
140 |
-
)
|
141 |
|
142 |
-
#
|
143 |
wib = pytz.timezone('Asia/Jakarta')
|
144 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
145 |
|
146 |
-
|
147 |
-
corrected_df_to_display = corrected_df[cols]
|
148 |
|
149 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
|
150 |
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
|
@@ -154,20 +144,16 @@ def show_deteksi_upload():
|
|
154 |
if st.button("Simpan", key="corrected_data"):
|
155 |
if 'df' in st.session_state:
|
156 |
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
|
157 |
-
|
158 |
wib = pytz.timezone('Asia/Jakarta')
|
159 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
160 |
corrected_df = corrected_df.drop(columns=['Correction'])
|
161 |
|
162 |
if not corrected_df.empty:
|
163 |
-
# Define GCS bucket and file name
|
164 |
bucket_name = "your-bucket-name"
|
165 |
file_name = "corrected_upload_data.csv"
|
166 |
|
167 |
-
# Convert DataFrame to list of dicts for GCS
|
168 |
correction_data = corrected_df.to_dict(orient='records')
|
169 |
|
170 |
-
# Save corrected data to GCS
|
171 |
save_corrections_to_gcs(bucket_name, file_name, correction_data)
|
172 |
|
173 |
st.success("Data telah disimpan.")
|
@@ -175,4 +161,4 @@ def show_deteksi_upload():
|
|
175 |
else:
|
176 |
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
|
177 |
else:
|
178 |
-
st.warning("Data deteksi tidak ditemukan.")
|
|
|
10 |
from datetime import datetime
|
11 |
import pytz
|
12 |
|
|
|
13 |
with open("credentials.json", "w") as f:
|
14 |
f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])
|
15 |
|
|
|
19 |
client = storage.Client()
|
20 |
bucket = client.bucket("dashboardhoax-bucket")
|
21 |
blob = bucket.blob("koreksi_pengguna_file.csv")
|
22 |
+
|
|
|
23 |
if blob.exists():
|
|
|
24 |
existing_data = blob.download_as_string().decode('utf-8')
|
25 |
existing_df = pd.read_csv(StringIO(existing_data))
|
26 |
else:
|
|
|
27 |
existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])
|
28 |
|
|
|
29 |
new_data_df = pd.DataFrame(correction_data)
|
30 |
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
31 |
|
|
|
32 |
updated_csv_data = updated_df.to_csv(index=False)
|
33 |
blob.upload_from_string(updated_csv_data, content_type='text/csv')
|
34 |
|
|
|
66 |
|
67 |
grid_options = GridOptionsBuilder.from_dataframe(df)
|
68 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
69 |
+
grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
|
70 |
gridOptions = grid_options.build()
|
|
|
71 |
|
72 |
AgGrid(
|
73 |
df,
|
|
|
85 |
st.error(f"Terjadi kesalahan saat deteksi: {e}")
|
86 |
|
87 |
if st.session_state.df is not None:
|
88 |
+
if 'Label' in st.session_state.df.columns:
|
89 |
+
accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
|
90 |
+
performance_text = (
|
91 |
+
f"*Performansi Model*\n\n"
|
92 |
+
f"*Accuracy:* {round(accuracy, 2)} "
|
93 |
+
f"*Precision:* {round(precision, 2)} "
|
94 |
+
f"*Recall:* {round(recall, 2)} "
|
95 |
+
f"*F1 Score:* {round(f1, 2)}"
|
96 |
+
)
|
97 |
+
st.success(performance_text)
|
|
|
98 |
|
99 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)
|
100 |
|
|
|
104 |
grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
|
105 |
grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
|
106 |
grid_options.configure_default_column(editable=True, groupable=True)
|
107 |
+
grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
|
108 |
gridOptions = grid_options.build()
|
109 |
|
110 |
grid_response = AgGrid(
|
|
|
127 |
st.session_state.df = edited_df.copy()
|
128 |
|
129 |
if not corrected_df.empty:
|
130 |
+
expected_cols = ['Timestamp', 'Result_Detection', 'Result_Correction', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
|
131 |
+
existing_cols = [col for col in expected_cols if col in corrected_df.columns]
|
|
|
|
|
|
|
132 |
|
133 |
+
# Tambahkan Timestamp hanya untuk penyimpanan
|
134 |
wib = pytz.timezone('Asia/Jakarta')
|
135 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
136 |
|
137 |
+
corrected_df_to_display = corrected_df[existing_cols]
|
|
|
138 |
|
139 |
st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
|
140 |
st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
|
|
|
144 |
if st.button("Simpan", key="corrected_data"):
|
145 |
if 'df' in st.session_state:
|
146 |
corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
|
|
|
147 |
wib = pytz.timezone('Asia/Jakarta')
|
148 |
corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
|
149 |
corrected_df = corrected_df.drop(columns=['Correction'])
|
150 |
|
151 |
if not corrected_df.empty:
|
|
|
152 |
bucket_name = "your-bucket-name"
|
153 |
file_name = "corrected_upload_data.csv"
|
154 |
|
|
|
155 |
correction_data = corrected_df.to_dict(orient='records')
|
156 |
|
|
|
157 |
save_corrections_to_gcs(bucket_name, file_name, correction_data)
|
158 |
|
159 |
st.success("Data telah disimpan.")
|
|
|
161 |
else:
|
162 |
st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
|
163 |
else:
|
164 |
+
st.warning("Data deteksi tidak ditemukan.")
|