Yacine Jernite commited on
Commit
9e65b63
·
1 Parent(s): bc0c2e4

simplified

Browse files
Files changed (5) hide show
  1. app.py +56 -116
  2. ui/tab_dataset.py +6 -21
  3. utils/constants.py +2 -35
  4. utils/dataset.py +2 -6
  5. utils/helpers.py +24 -22
app.py CHANGED
@@ -17,6 +17,7 @@ from utils.dataset import (
17
  )
18
  from utils.helpers import (
19
  check_token_availability,
 
20
  format_token_status,
21
  get_inference_token,
22
  get_org_token,
@@ -39,6 +40,31 @@ from ui.tab_testing import (
39
  # Handlers
40
  # ============================================================================
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None):
43
  """Handle test execution."""
44
 
@@ -81,57 +107,25 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
81
  org_token = get_org_token()
82
  if org_token:
83
  try:
84
- categories_and_reasoning_text = format_categories_and_reasoning(parsed)
85
- policy_violation = parsed.get("label", -1)
86
-
87
- data = {
88
- "input": test_input,
89
- "policy_violation": policy_violation,
90
- "categories_and_reasoning": categories_and_reasoning_text,
91
- "policy": current_policy,
92
- "model_selection": model_choice,
93
- "raw_response": raw_response,
94
- "reasoning_trace": reasoning or "",
95
- "reasoning_effort": reasoning_effort or "",
96
- "max_tokens": int(max_tokens),
97
- "temperature": float(temperature),
98
- "top_p": float(top_p),
99
- "system_prompt": system_prompt_val or "",
100
- "response_format": response_format_val or "",
101
- "timestamp": datetime.now().isoformat(),
102
- }
103
- repo_id = get_roost_dataset_repo_id()
104
- save_to_dataset(repo_id, org_token, data)
105
  except Exception as e:
106
- # Log error but don't break test execution
107
  print(f"Failed to save to ROOST dataset: {e}")
108
  elif save_mode == "Save to Private Dataset":
109
  personal_token, _ = get_personal_token(oauth_token)
110
  if personal_token:
111
  try:
112
- categories_and_reasoning_text = format_categories_and_reasoning(parsed)
113
- policy_violation = parsed.get("label", -1)
114
-
115
- data = {
116
- "input": test_input,
117
- "policy_violation": policy_violation,
118
- "categories_and_reasoning": categories_and_reasoning_text,
119
- "policy": current_policy,
120
- "model_selection": model_choice,
121
- "raw_response": raw_response,
122
- "reasoning_trace": reasoning or "",
123
- "reasoning_effort": reasoning_effort or "",
124
- "max_tokens": int(max_tokens),
125
- "temperature": float(temperature),
126
- "top_p": float(top_p),
127
- "system_prompt": system_prompt_val or "",
128
- "response_format": response_format_val or "",
129
- "timestamp": datetime.now().isoformat(),
130
- }
131
- repo_id = get_dataset_repo_id(personal_token)
132
- save_to_dataset(repo_id, personal_token, data)
133
  except Exception as e:
134
- # Log error but don't break test execution
135
  print(f"Failed to save to private dataset: {e}")
136
 
137
  return (
@@ -237,55 +231,19 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
237
  outputs=model_info_display,
238
  )
239
 
240
- # Token status update handler
241
- def update_token_status(oauth_token: gr.OAuthToken | None = None):
242
- """Update token status markdown when OAuth changes."""
243
- return format_token_status(oauth_token)
244
-
245
- # Save mode help text update handler
246
- def update_save_mode_help(oauth_token: gr.OAuthToken | None = None):
247
- """Update save mode help text based on token availability."""
248
- from ui.tab_testing import format_save_mode_help
249
- has_personal, has_org = check_token_availability(oauth_token)
250
- return format_save_mode_help(has_personal, has_org)
251
-
252
- # Dataset button state update handler
253
- def update_dataset_button_states(oauth_token: gr.OAuthToken | None = None):
254
- """Update dataset button states based on token availability."""
255
- has_personal, has_org = check_token_availability(oauth_token)
256
-
257
- # Update help text
258
- help_text = (
259
- f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
260
- f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
261
- )
262
-
263
- return (
264
- gr.update(interactive=has_personal), # refresh_private_btn
265
- gr.update(interactive=True), # refresh_roost_btn (can load if public)
266
- help_text, # dataset_help_text
267
- )
268
-
269
  # Combined handler for login button click - updates all token-dependent UI
270
  def handle_login_click(oauth_token: gr.OAuthToken | None = None):
271
  """Handle login button click and update all token-dependent UI."""
272
- token_status = format_token_status(oauth_token)
273
-
274
  from ui.tab_testing import format_save_mode_help
275
- has_personal, has_org = check_token_availability(oauth_token)
276
- save_help = format_save_mode_help(has_personal, has_org)
277
 
278
- dataset_help = (
279
- f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
280
- f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
281
- )
282
 
283
  return (
284
- token_status, # token_status_markdown
285
- save_help, # save_mode_help
286
  gr.update(interactive=has_personal), # refresh_private_btn
287
  gr.update(interactive=True), # refresh_roost_btn
288
- dataset_help, # dataset_help_text
289
  )
290
 
291
  login_button.click(
@@ -303,66 +261,48 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
303
  # Dataset load handler
304
  def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
305
  """Load example from dataset and populate all fields."""
306
- if (not cached_examples_list or not selected_label or
307
- not dropdown_choices_list or selected_label not in dropdown_choices_list):
308
- # Return None to skip updates
309
- return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
310
 
311
  try:
312
- # Find index by matching label
313
  idx = dropdown_choices_list.index(selected_label)
314
- if idx < 0 or idx >= len(cached_examples_list):
315
- return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
316
 
317
  example = cached_examples_list[idx]
318
-
319
- # Get policy - ensure it's a string (not None)
320
  policy = example.get("policy", "") or ""
321
-
322
- # Extract saved results
323
  policy_violation = example.get("policy_violation", -1)
324
- categories_and_reasoning = example.get("categories_and_reasoning", "")
325
- raw_response = example.get("raw_response", "")
326
- reasoning_trace = example.get("reasoning_trace", "")
327
  model_selection = example.get("model_selection", "")
328
  reasoning_effort_val = example.get("reasoning_effort", "")
 
329
 
330
  # Format label text
331
- if policy_violation == 1:
332
- label_text = "## Policy Violation Detected"
333
- elif policy_violation == 0:
334
- label_text = "## ✅ No Policy Violation"
335
- else:
336
- label_text = "## ⚠️ Unable to determine label"
337
-
338
- # Format model info
339
- model_info = format_model_info(model_selection, reasoning_effort_val)
340
 
341
- # Format reasoning info
342
  reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)
343
-
344
  reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())
345
 
346
  return (
347
  example.get("input", ""),
348
- policy, # current_policy_state - UI syncs automatically via change handler
349
  example.get("model_selection", ""),
350
- example.get("reasoning_effort", ""),
351
  example.get("max_tokens", 0),
352
  example.get("temperature", 0.0),
353
  example.get("top_p", 0.0),
354
  example.get("system_prompt", ""),
355
  example.get("response_format", ""),
356
- # Results
357
- model_info,
358
  label_text,
359
- categories_and_reasoning,
360
- raw_response,
361
  gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
362
  gr.update(value=reasoning_trace or "", visible=reasoning_visible),
363
  )
364
  except (ValueError, IndexError):
365
- return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
366
 
367
  example_dropdown.change(
368
  load_example_from_dataset,
 
17
  )
18
  from utils.helpers import (
19
  check_token_availability,
20
+ format_dataset_help_text,
21
  format_token_status,
22
  get_inference_token,
23
  get_org_token,
 
40
  # Handlers
41
  # ============================================================================
42
 
43
+ def prepare_save_data(test_input, current_policy, parsed, model_choice, raw_response,
44
+ reasoning, reasoning_effort, max_tokens, temperature, top_p,
45
+ system_prompt_val, response_format_val):
46
+ """Prepare data dict for saving to dataset."""
47
+ categories_and_reasoning_text = format_categories_and_reasoning(parsed)
48
+ policy_violation = parsed.get("label", -1)
49
+
50
+ return {
51
+ "input": test_input,
52
+ "policy_violation": policy_violation,
53
+ "categories_and_reasoning": categories_and_reasoning_text,
54
+ "policy": current_policy,
55
+ "model_selection": model_choice,
56
+ "raw_response": raw_response,
57
+ "reasoning_trace": reasoning or "",
58
+ "reasoning_effort": reasoning_effort or "",
59
+ "max_tokens": int(max_tokens),
60
+ "temperature": float(temperature),
61
+ "top_p": float(top_p),
62
+ "system_prompt": system_prompt_val or "",
63
+ "response_format": response_format_val or "",
64
+ "timestamp": datetime.now().isoformat(),
65
+ }
66
+
67
+
68
  def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None):
69
  """Handle test execution."""
70
 
 
107
  org_token = get_org_token()
108
  if org_token:
109
  try:
110
+ data = prepare_save_data(
111
+ test_input, current_policy, parsed, model_choice, raw_response,
112
+ reasoning, reasoning_effort, max_tokens, temperature, top_p,
113
+ system_prompt_val, response_format_val
114
+ )
115
+ save_to_dataset(get_roost_dataset_repo_id(), org_token, data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  except Exception as e:
 
117
  print(f"Failed to save to ROOST dataset: {e}")
118
  elif save_mode == "Save to Private Dataset":
119
  personal_token, _ = get_personal_token(oauth_token)
120
  if personal_token:
121
  try:
122
+ data = prepare_save_data(
123
+ test_input, current_policy, parsed, model_choice, raw_response,
124
+ reasoning, reasoning_effort, max_tokens, temperature, top_p,
125
+ system_prompt_val, response_format_val
126
+ )
127
+ save_to_dataset(get_dataset_repo_id(personal_token), personal_token, data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  except Exception as e:
 
129
  print(f"Failed to save to private dataset: {e}")
130
 
131
  return (
 
231
  outputs=model_info_display,
232
  )
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  # Combined handler for login button click - updates all token-dependent UI
235
  def handle_login_click(oauth_token: gr.OAuthToken | None = None):
236
  """Handle login button click and update all token-dependent UI."""
 
 
237
  from ui.tab_testing import format_save_mode_help
 
 
238
 
239
+ has_personal, has_org = check_token_availability(oauth_token)
 
 
 
240
 
241
  return (
242
+ format_token_status(oauth_token), # token_status_markdown
243
+ format_save_mode_help(has_personal, has_org), # save_mode_help
244
  gr.update(interactive=has_personal), # refresh_private_btn
245
  gr.update(interactive=True), # refresh_roost_btn
246
+ format_dataset_help_text(has_personal, has_org), # dataset_help_text
247
  )
248
 
249
  login_button.click(
 
261
  # Dataset load handler
262
  def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
263
  """Load example from dataset and populate all fields."""
264
+ if not (cached_examples_list and selected_label and dropdown_choices_list and
265
+ selected_label in dropdown_choices_list):
266
+ return [None] * 15
 
267
 
268
  try:
 
269
  idx = dropdown_choices_list.index(selected_label)
270
+ if not (0 <= idx < len(cached_examples_list)):
271
+ return [None] * 15
272
 
273
  example = cached_examples_list[idx]
 
 
274
  policy = example.get("policy", "") or ""
 
 
275
  policy_violation = example.get("policy_violation", -1)
 
 
 
276
  model_selection = example.get("model_selection", "")
277
  reasoning_effort_val = example.get("reasoning_effort", "")
278
+ reasoning_trace = example.get("reasoning_trace", "")
279
 
280
  # Format label text
281
+ emoji = "❌" if policy_violation == 1 else "✅" if policy_violation == 0 else "⚠️"
282
+ label_text = f"## {emoji} {'Policy Violation Detected' if policy_violation == 1 else 'No Policy Violation' if policy_violation == 0 else 'Unable to determine label'}"
 
 
 
 
 
 
 
283
 
 
284
  reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)
 
285
  reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())
286
 
287
  return (
288
  example.get("input", ""),
289
+ policy,
290
  example.get("model_selection", ""),
291
+ reasoning_effort_val,
292
  example.get("max_tokens", 0),
293
  example.get("temperature", 0.0),
294
  example.get("top_p", 0.0),
295
  example.get("system_prompt", ""),
296
  example.get("response_format", ""),
297
+ format_model_info(model_selection, reasoning_effort_val),
 
298
  label_text,
299
+ example.get("categories_and_reasoning", ""),
300
+ example.get("raw_response", ""),
301
  gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
302
  gr.update(value=reasoning_trace or "", visible=reasoning_visible),
303
  )
304
  except (ValueError, IndexError):
305
+ return [None] * 15
306
 
307
  example_dropdown.change(
308
  load_example_from_dataset,
ui/tab_dataset.py CHANGED
@@ -8,7 +8,7 @@ import gradio as gr
8
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
 
10
  from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples
11
- from utils.helpers import check_token_availability, get_org_token, get_personal_token
12
  from utils.model_interface import extract_model_id, get_model_info
13
 
14
 
@@ -26,12 +26,8 @@ def format_preview_markdown(example: dict) -> str:
26
  model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown"
27
 
28
  # Format label with emoji
29
- if policy_violation == 1:
30
- label_text = " Policy Violation Detected"
31
- elif policy_violation == 0:
32
- label_text = "✅ No Policy Violation"
33
- else:
34
- label_text = "⚠️ Unable to determine label"
35
 
36
  # Truncate policy preview
37
  policy_preview = policy # [:512] + "..." if len(policy) > 512 else policy
@@ -84,10 +80,7 @@ def build_dataset_tab() -> dict:
84
 
85
  # Help text explaining token requirements
86
  dataset_help_text = gr.Markdown(
87
- value=(
88
- f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
89
- f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
90
- ),
91
  visible=True
92
  )
93
 
@@ -145,22 +138,14 @@ def build_dataset_tab() -> dict:
145
 
146
  return "*Select an example to preview*"
147
 
148
- def refresh_private(oauth_token: gr.OAuthToken | None = None):
149
- """Refresh private dataset."""
150
- return refresh_dataset("private", oauth_token)
151
-
152
- def refresh_roost(oauth_token: gr.OAuthToken | None = None):
153
- """Refresh ROOST dataset."""
154
- return refresh_dataset("roost", oauth_token)
155
-
156
  refresh_private_btn.click(
157
- refresh_private,
158
  inputs=None, # OAuth token auto-injected
159
  outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
160
  )
161
 
162
  refresh_roost_btn.click(
163
- refresh_roost,
164
  inputs=None, # OAuth token auto-injected
165
  outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
166
  )
 
8
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
 
10
  from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples
11
+ from utils.helpers import check_token_availability, format_dataset_help_text, get_label_emoji, get_org_token, get_personal_token
12
  from utils.model_interface import extract_model_id, get_model_info
13
 
14
 
 
26
  model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown"
27
 
28
  # Format label with emoji
29
+ emoji = get_label_emoji(policy_violation)
30
+ label_text = f"{emoji} Policy Violation Detected" if policy_violation == 1 else f"{emoji} No Policy Violation" if policy_violation == 0 else f"{emoji} Unable to determine label"
 
 
 
 
31
 
32
  # Truncate policy preview
33
  policy_preview = policy # [:512] + "..." if len(policy) > 512 else policy
 
80
 
81
  # Help text explaining token requirements
82
  dataset_help_text = gr.Markdown(
83
+ value=format_dataset_help_text(has_personal, has_org),
 
 
 
84
  visible=True
85
  )
86
 
 
138
 
139
  return "*Select an example to preview*"
140
 
 
 
 
 
 
 
 
 
141
  refresh_private_btn.click(
142
+ lambda oauth_token=None: refresh_dataset("private", oauth_token),
143
  inputs=None, # OAuth token auto-injected
144
  outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
145
  )
146
 
147
  refresh_roost_btn.click(
148
+ lambda oauth_token=None: refresh_dataset("roost", oauth_token),
149
  inputs=None, # OAuth token auto-injected
150
  outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
151
  )
utils/constants.py CHANGED
@@ -46,39 +46,6 @@ MODELS = [
46
  },
47
  ]
48
 
49
- UNUSED_MODELS = [
50
- {
51
- "name": "Qwen3-4B-Thinking",
52
- "id": "Qwen/Qwen3-4B-Thinking-2507",
53
- "is_thinking": True,
54
- "supports_reasoning_level": False,
55
- },
56
- {
57
- "name": "Qwen3-4B-Instruct",
58
- "id": "Qwen/Qwen3-4B-Instruct-2507",
59
- "is_thinking": False,
60
- "supports_reasoning_level": False,
61
- },
62
- {
63
- "name": "Qwen3-30B-Instruct",
64
- "id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
65
- "is_thinking": False,
66
- "supports_reasoning_level": False,
67
- },
68
- {
69
- "name": "GPT-OSS-20B",
70
- "id": "openai/gpt-oss-20b",
71
- "is_thinking": True,
72
- "supports_reasoning_level": True,
73
- },
74
- {
75
- "name": "GPT-OSS-120B",
76
- "id": "openai/gpt-oss-120b",
77
- "is_thinking": True,
78
- "supports_reasoning_level": True,
79
- },
80
- ]
81
-
82
  # Reasoning effort levels for GPT-OSS
83
  REASONING_EFFORTS = ["Low", "Medium", "High"]
84
 
@@ -105,10 +72,10 @@ RESPONSE_FORMAT = """## Response Format
105
 
106
  Given a content item, please provide a JSON-formatted response with the following fields:
107
  - `label`: INT - 1 for a policy violation, 0 for no violation
108
- - `categories`: LIST[DCT] - a list of categories from the disallowed and allowed categories that the content item falls into along with the reasoning for each category. Each dict has the following keys:
109
  - `category`: STR - the category from the policy
110
  - `reasoning`: STR - the reasoning for why the content item falls into this category
111
- - `policy_source`: STR - specific text from the policy that best supports the reasoning. Use [...] to concatenate multi-part citations. Make sure to quote the policy text exactly and include all relevant passages.
112
  """
113
 
114
  # Test examples from notebook
 
46
  },
47
  ]
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Reasoning effort levels for GPT-OSS
50
  REASONING_EFFORTS = ["Low", "Medium", "High"]
51
 
 
72
 
73
  Given a content item, please provide a JSON-formatted response with the following fields:
74
  - `label`: INT - 1 for a policy violation, 0 for no violation
75
+ - `categories`: LIST[DCT] - a list of categories (at least one) from the disallowed and allowed categories that the content item falls into along with the reasoning for each category. Each dict has the following keys:
76
  - `category`: STR - the category from the policy
77
  - `reasoning`: STR - the reasoning for why the content item falls into this category
78
+ - `policy_source`: STR - specific text from the policy that best supports the reasoning. Use [...] to concatenate multi-part citations. Prioritize policy explanatory text over repeating the category name. Make sure to quote the policy text exactly and include all relevant passages.
79
  """
80
 
81
  # Test examples from notebook
utils/dataset.py CHANGED
@@ -6,6 +6,7 @@ from datasets import Dataset, load_dataset
6
  from huggingface_hub import HfApi
7
 
8
  from utils.model_interface import extract_model_id, get_model_info
 
9
 
10
 
11
  def get_username_from_token(token: str | None) -> str:
@@ -175,12 +176,7 @@ def load_dataset_examples(repo_id: str, token: str | None) -> tuple[list[dict],
175
  policy_violation = example.get("policy_violation", -1)
176
 
177
  # Get label emoji
178
- if policy_violation == 1:
179
- label_emoji = "❌"
180
- elif policy_violation == 0:
181
- label_emoji = "✅"
182
- else:
183
- label_emoji = "⚠️"
184
 
185
  # Extract model name
186
  model_id = extract_model_id(model_selection)
 
6
  from huggingface_hub import HfApi
7
 
8
  from utils.model_interface import extract_model_id, get_model_info
9
+ from utils.helpers import get_label_emoji
10
 
11
 
12
  def get_username_from_token(token: str | None) -> str:
 
176
  policy_violation = example.get("policy_violation", -1)
177
 
178
  # Get label emoji
179
+ label_emoji = get_label_emoji(policy_violation)
 
 
 
 
 
180
 
181
  # Extract model name
182
  model_id = extract_model_id(model_selection)
utils/helpers.py CHANGED
@@ -6,6 +6,25 @@ import gradio as gr
6
  from dotenv import load_dotenv
7
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def get_personal_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
10
  """
11
  Get personal Hugging Face token from OAuth or .env fallback.
@@ -20,30 +39,18 @@ def get_personal_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, s
20
  - hf_token: Token string if available, None otherwise
21
  - status_message: Warning message if using local .env, empty string otherwise
22
  """
23
- print(f"DEBUG: get_personal_token called with oauth_token type: {type(oauth_token)}")
24
-
25
  if oauth_token is None or (isinstance(oauth_token, str) and oauth_token == "Log in to Hugging Face"):
26
  # Try loading from .env file
27
- print("DEBUG: oauth_token is None, loading from .env")
28
- load_dotenv()
29
- hf_token = os.getenv("HF_TOKEN_MLSOC")
30
  if hf_token is None:
31
- print("DEBUG: HF_TOKEN_MLSOC not found in .env")
32
  return None, ""
33
- else:
34
- print(f"DEBUG: Loaded token from .env, length: {len(hf_token)}, first 4 chars: {hf_token[:4] if len(hf_token) >= 4 else hf_token}")
35
- return hf_token, "\n⚠️ Using local .env file for token (not online)"
36
  else:
37
  # OAuthToken object
38
- print(f"DEBUG: oauth_token is OAuthToken object")
39
  token = oauth_token.token
40
- print(f"DEBUG: Extracted token from OAuthToken, length: {len(token) if token else 0}, first 4 chars: {token[:4] if token and len(token) >= 4 else (token if token else 'None')}")
41
  if not token or not token.strip():
42
- print("DEBUG: OAuthToken.token is empty, falling back to .env")
43
- load_dotenv()
44
- hf_token = os.getenv("HF_TOKEN_MLSOC")
45
  if hf_token:
46
- print(f"DEBUG: Loaded token from .env (empty OAuth case), length: {len(hf_token)}, first 4 chars: {hf_token[:4] if len(hf_token) >= 4 else hf_token}")
47
  return hf_token, "\n⚠️ Using local .env file for token (not online)"
48
  return None, ""
49
  return token, ""
@@ -64,12 +71,7 @@ def get_org_token() -> str | None:
64
  return org_token
65
 
66
  # Fall back to .env file
67
- load_dotenv()
68
- org_token = os.getenv("ROOST_TOKEN_FALLBACK")
69
- if org_token:
70
- return org_token
71
-
72
- return None
73
 
74
 
75
  def get_inference_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
@@ -111,7 +113,7 @@ def format_token_status(oauth_token: gr.OAuthToken | None) -> str:
111
  has_personal, has_org = check_token_availability(oauth_token)
112
 
113
  lines = [
114
- "You can log in to yout Hugging Face account to save your work in a private dataset and use the app for inference after the end of the hackathon.",
115
  "### Token Status",
116
  ]
117
 
 
6
  from dotenv import load_dotenv
7
 
8
 
9
+ def _load_token_from_env(env_var: str) -> str | None:
10
+ """Load token from .env file."""
11
+ load_dotenv()
12
+ return os.getenv(env_var)
13
+
14
+
15
+ def get_label_emoji(policy_violation: int) -> str:
16
+ """Get emoji for policy violation label."""
17
+ return "❌" if policy_violation == 1 else "✅" if policy_violation == 0 else "⚠️"
18
+
19
+
20
+ def format_dataset_help_text(has_personal: bool, has_org: bool) -> str:
21
+ """Format help text explaining dataset availability."""
22
+ return (
23
+ f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
24
+ f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
25
+ )
26
+
27
+
28
  def get_personal_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
29
  """
30
  Get personal Hugging Face token from OAuth or .env fallback.
 
39
  - hf_token: Token string if available, None otherwise
40
  - status_message: Warning message if using local .env, empty string otherwise
41
  """
 
 
42
  if oauth_token is None or (isinstance(oauth_token, str) and oauth_token == "Log in to Hugging Face"):
43
  # Try loading from .env file
44
+ hf_token = _load_token_from_env("HF_TOKEN_MLSOC")
 
 
45
  if hf_token is None:
 
46
  return None, ""
47
+ return hf_token, "\n⚠️ Using local .env file for token (not online)"
 
 
48
  else:
49
  # OAuthToken object
 
50
  token = oauth_token.token
 
51
  if not token or not token.strip():
52
+ hf_token = _load_token_from_env("HF_TOKEN_MLSOC")
 
 
53
  if hf_token:
 
54
  return hf_token, "\n⚠️ Using local .env file for token (not online)"
55
  return None, ""
56
  return token, ""
 
71
  return org_token
72
 
73
  # Fall back to .env file
74
+ return _load_token_from_env("ROOST_TOKEN_FALLBACK")
 
 
 
 
 
75
 
76
 
77
  def get_inference_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
 
113
  has_personal, has_org = check_token_availability(oauth_token)
114
 
115
  lines = [
116
+ "You can log in to your Hugging Face account to save your work in a private dataset and use the app for inference after the end of the hackathon.",
117
  "### Token Status",
118
  ]
119