Dusit-P commited on
Commit
a48b7ac
·
verified ·
1 Parent(s): 61a9d0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -1,9 +1,9 @@
1
  # app.py — Thai Sentiment (WangchanBERTa Variants)
2
  # - No Single tab
3
  # - No aspect analysis (focus on POS/NEG)
4
- # - CSV tab: date pickers appear ONLY if a date column exists
5
  # - Predict buttons right below inputs
6
- import os, json, importlib.util, traceback, re, math, tempfile
7
  import gradio as gr
8
  import torch, pandas as pd
9
  import torch.nn.functional as F
@@ -86,6 +86,19 @@ def _format_pct(x: float) -> str:
86
  def _to_datetime_safe(s):
87
  return pd.to_datetime(s, errors="coerce", infer_datetime_format=True, utc=False)
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  LIKELY_TEXT_COLS = ["text","review","message","comment","content","sentence","body","ข้อความ","รีวิว"]
90
  LIKELY_DATE_COLS = ["date","created_at","time","timestamp","datetime","วันที่","วันเวลา","เวลา"]
91
 
@@ -107,11 +120,9 @@ def detect_text_and_date_cols(df: pd.DataFrame):
107
  if c.lower() in LIKELY_DATE_COLS:
108
  date_candidates.append(c)
109
  continue
110
- # try parse sample
111
  sample = df[c].head(50)
112
  if _to_datetime_safe(sample).notna().sum() >= max(3, int(len(sample)*0.2)):
113
  date_candidates.append(c)
114
-
115
  date_candidates = list(dict.fromkeys(date_candidates))
116
  date_col = date_candidates[0] if len(date_candidates)>0 else None
117
  return text_col, date_candidates, date_col
@@ -223,7 +234,6 @@ def on_file_change(file_obj):
223
  - toggle visibility ของ date controls + line chart placeholder
224
  """
225
  if file_obj is None:
226
- # reset UI
227
  return (
228
  gr.update(choices=[], value=None), # text_dd
229
  gr.update(choices=[], value=None), # date_dd
@@ -240,7 +250,6 @@ def on_file_change(file_obj):
240
  cols = list(df_raw.columns)
241
  text_col, date_candidates, date_col = detect_text_and_date_cols(df_raw)
242
 
243
- # show/hide date controls
244
  has_date = date_col is not None
245
  note = "Detected text column: **{}**".format(text_col)
246
  if has_date:
@@ -273,7 +282,7 @@ def on_file_change(file_obj):
273
 
274
  # ================= CSV Predict =================
275
  def predict_csv(file_obj, model_choice: str, text_col_name: str,
276
- date_col_name: str, date_from: str, date_to: str,
277
  freq_choice: str, use_ma: bool):
278
 
279
  try:
@@ -281,8 +290,8 @@ def predict_csv(file_obj, model_choice: str, text_col_name: str,
281
  return pd.DataFrame(), go.Figure(), go.Figure(), gr.update(visible=False, value=go.Figure()), "Please upload a CSV.", None
282
 
283
  df_raw = pd.read_csv(file_obj.name)
284
-
285
  cols = list(df_raw.columns)
 
286
  col_text = text_col_name if text_col_name in cols else detect_text_and_date_cols(df_raw)[0]
287
 
288
  texts = [_norm_text(v) for v in df_raw[col_text].tolist()]
@@ -306,11 +315,16 @@ def predict_csv(file_obj, model_choice: str, text_col_name: str,
306
  df_time = out_df.copy()
307
  df_time["__dt__"] = dts
308
  df_time = df_time.dropna(subset=["__dt__"])
309
- # filter range if chosen
310
- if date_from:
311
- df_time = df_time[df_time["__dt__"] >= pd.to_datetime(date_from)]
312
- if date_to:
313
- df_time = df_time[df_time["__dt__"] <= pd.to_datetime(date_to)]
 
 
 
 
 
314
  if len(df_time) > 0:
315
  fig_line = make_time_chart(df_time, "__dt__", freq_choice, use_ma)
316
  show_time = True
@@ -350,7 +364,6 @@ with gr.Blocks(title="Thai Sentiment (WangchanBERTa Variants)") as demo:
350
  bar2 = gr.Plot(label="Label counts (bar)")
351
  pie2 = gr.Plot(label="Positive vs Negative (pie)")
352
  sum2 = gr.Markdown()
353
-
354
  btn_batch.click(predict_many, [t2, model_radio], [df2, bar2, pie2, sum2])
355
 
356
  # ---- CSV Upload ----
@@ -360,8 +373,9 @@ with gr.Blocks(title="Thai Sentiment (WangchanBERTa Variants)") as demo:
360
  text_dd = gr.Dropdown(label="คอ���ัมน์ข้อความ", choices=[], value=None)
361
  date_dd = gr.Dropdown(label="คอลัมน์วันเวลา (ถ้ามี)", choices=[], value=None)
362
  with gr.Row():
363
- date_from = gr.Date(label="เริ่มวันที่", visible=False)
364
- date_to = gr.Date(label="ถึงวันที่", visible=False)
 
365
  freq = gr.Radio(choices=["D","W","M"], value="D", label="ความถี่ (Day/Week/Month)", visible=False)
366
  use_ma = gr.Checkbox(value=True, label="Moving average (7/4/3)", visible=False)
367
 
@@ -375,13 +389,11 @@ with gr.Blocks(title="Thai Sentiment (WangchanBERTa Variants)") as demo:
375
  sum3 = gr.Markdown()
376
  dl3 = gr.File(label="ดาวน์โหลดผลเป็น CSV", interactive=False)
377
 
378
- # เมื่ออัปโหลดไฟล์ → เติม dropdowns + toggle date controls + เคลียร์ผลลัพธ์เก่า
379
  file_in.change(
380
  on_file_change, [file_in],
381
  [text_dd, date_dd, date_from, date_to, freq, use_ma, line, note_detect]
382
  )
383
 
384
- # ปุ่ม predict CSV อยู่ใต้ตัวกรอง (ใกล้มือ)
385
  btn_csv.click(
386
  predict_csv,
387
  [file_in, model_radio, text_dd, date_dd, date_from, date_to, freq, use_ma],
 
1
  # app.py — Thai Sentiment (WangchanBERTa Variants)
2
  # - No Single tab
3
  # - No aspect analysis (focus on POS/NEG)
4
+ # - CSV tab: date pickers appear ONLY if a date column exists (use DatePicker)
5
  # - Predict buttons right below inputs
6
+ import os, json, importlib.util, traceback, re, math, tempfile, datetime
7
  import gradio as gr
8
  import torch, pandas as pd
9
  import torch.nn.functional as F
 
86
  def _to_datetime_safe(s):
87
  return pd.to_datetime(s, errors="coerce", infer_datetime_format=True, utc=False)
88
 
89
+ def _normalize_datepicker_value(v):
90
+ """รับค่าจาก gr.DatePicker (datetime.date หรือ str หรือ None) → pandas.Timestamp หรือ None"""
91
+ if v is None or (isinstance(v, float) and math.isnan(v)):
92
+ return None
93
+ if isinstance(v, datetime.date):
94
+ return pd.Timestamp(v)
95
+ # เผื่อบางเวอร์ชันส่ง str 'YYYY-MM-DD'
96
+ try:
97
+ ts = pd.to_datetime(v, errors="coerce")
98
+ return ts if pd.notna(ts) else None
99
+ except Exception:
100
+ return None
101
+
102
  LIKELY_TEXT_COLS = ["text","review","message","comment","content","sentence","body","ข้อความ","รีวิว"]
103
  LIKELY_DATE_COLS = ["date","created_at","time","timestamp","datetime","วันที่","วันเวลา","เวลา"]
104
 
 
120
  if c.lower() in LIKELY_DATE_COLS:
121
  date_candidates.append(c)
122
  continue
 
123
  sample = df[c].head(50)
124
  if _to_datetime_safe(sample).notna().sum() >= max(3, int(len(sample)*0.2)):
125
  date_candidates.append(c)
 
126
  date_candidates = list(dict.fromkeys(date_candidates))
127
  date_col = date_candidates[0] if len(date_candidates)>0 else None
128
  return text_col, date_candidates, date_col
 
234
  - toggle visibility ของ date controls + line chart placeholder
235
  """
236
  if file_obj is None:
 
237
  return (
238
  gr.update(choices=[], value=None), # text_dd
239
  gr.update(choices=[], value=None), # date_dd
 
250
  cols = list(df_raw.columns)
251
  text_col, date_candidates, date_col = detect_text_and_date_cols(df_raw)
252
 
 
253
  has_date = date_col is not None
254
  note = "Detected text column: **{}**".format(text_col)
255
  if has_date:
 
282
 
283
  # ================= CSV Predict =================
284
  def predict_csv(file_obj, model_choice: str, text_col_name: str,
285
+ date_col_name: str, date_from, date_to,
286
  freq_choice: str, use_ma: bool):
287
 
288
  try:
 
290
  return pd.DataFrame(), go.Figure(), go.Figure(), gr.update(visible=False, value=go.Figure()), "Please upload a CSV.", None
291
 
292
  df_raw = pd.read_csv(file_obj.name)
 
293
  cols = list(df_raw.columns)
294
+
295
  col_text = text_col_name if text_col_name in cols else detect_text_and_date_cols(df_raw)[0]
296
 
297
  texts = [_norm_text(v) for v in df_raw[col_text].tolist()]
 
315
  df_time = out_df.copy()
316
  df_time["__dt__"] = dts
317
  df_time = df_time.dropna(subset=["__dt__"])
318
+
319
+ # normalize datepicker values
320
+ start_ts = _normalize_datepicker_value(date_from)
321
+ end_ts = _normalize_datepicker_value(date_to)
322
+
323
+ if start_ts is not None:
324
+ df_time = df_time[df_time["__dt__"] >= start_ts]
325
+ if end_ts is not None:
326
+ df_time = df_time[df_time["__dt__"] <= end_ts]
327
+
328
  if len(df_time) > 0:
329
  fig_line = make_time_chart(df_time, "__dt__", freq_choice, use_ma)
330
  show_time = True
 
364
  bar2 = gr.Plot(label="Label counts (bar)")
365
  pie2 = gr.Plot(label="Positive vs Negative (pie)")
366
  sum2 = gr.Markdown()
 
367
  btn_batch.click(predict_many, [t2, model_radio], [df2, bar2, pie2, sum2])
368
 
369
  # ---- CSV Upload ----
 
373
  text_dd = gr.Dropdown(label="คอ���ัมน์ข้อความ", choices=[], value=None)
374
  date_dd = gr.Dropdown(label="คอลัมน์วันเวลา (ถ้ามี)", choices=[], value=None)
375
  with gr.Row():
376
+ # ใช้ DatePicker แทน Date (รองรับ gradio เวอร์ชันที่ไม่เคยมี gr.Date)
377
+ date_from = gr.DatePicker(label="เริ่มวันที่", visible=False)
378
+ date_to = gr.DatePicker(label="ถึงวันที่", visible=False)
379
  freq = gr.Radio(choices=["D","W","M"], value="D", label="ความถี่ (Day/Week/Month)", visible=False)
380
  use_ma = gr.Checkbox(value=True, label="Moving average (7/4/3)", visible=False)
381
 
 
389
  sum3 = gr.Markdown()
390
  dl3 = gr.File(label="ดาวน์โหลดผลเป็น CSV", interactive=False)
391
 
 
392
  file_in.change(
393
  on_file_change, [file_in],
394
  [text_dd, date_dd, date_from, date_to, freq, use_ma, line, note_detect]
395
  )
396
 
 
397
  btn_csv.click(
398
  predict_csv,
399
  [file_in, model_radio, text_dd, date_dd, date_from, date_to, freq, use_ma],