broadfield-dev commited on
Commit
175e3dd
·
verified ·
1 Parent(s): e1c75d3

Update processor.py

Browse files
Files changed (1) hide show
  1. processor.py +20 -10
processor.py CHANGED
@@ -79,7 +79,7 @@ class DatasetCommandCenter:
79
 
80
  def _sanitize_for_json(self, obj):
81
  """
82
- Ensures data is safe for JSON serialization to prevent UI crashes (NaN, NaT, Infinity).
83
  """
84
  if isinstance(obj, float):
85
  if math.isnan(obj) or math.isinf(obj):
@@ -133,6 +133,9 @@ class DatasetCommandCenter:
133
  for i, row in enumerate(ds_stream):
134
  if i >= 10: break
135
 
 
 
 
136
  # Clean row for UI
137
  clean_row = self._sanitize_for_json(row)
138
  sample_rows.append(clean_row)
@@ -179,8 +182,6 @@ class DatasetCommandCenter:
179
  def _get_value_by_path(self, obj, path):
180
  """
181
  Retrieves a value from the row.
182
- PRIORITY 1: Exact Key Match (Simplest, safest path).
183
- PRIORITY 2: Dot Notation Traversal (for nested JSON).
184
  """
185
  if not path: return obj
186
 
@@ -195,12 +196,17 @@ class DatasetCommandCenter:
195
  current = obj
196
 
197
  for i, key in enumerate(keys):
198
- # Access key with duck-typing support (works on dicts, UserDicts, etc)
199
  try:
200
- current = current[key]
 
 
 
 
201
  except:
202
- return None # Key not found
203
 
 
 
204
  # Lazy Parsing: Only parse string if we need to go deeper
205
  is_last_key = (i == len(keys) - 1)
206
  if not is_last_key and isinstance(current, str):
@@ -209,7 +215,7 @@ class DatasetCommandCenter:
209
  try:
210
  current = json.loads(s)
211
  except:
212
- return None # Broken JSON
213
 
214
  return current
215
 
@@ -230,6 +236,7 @@ class DatasetCommandCenter:
230
 
231
  matched_item = None
232
  for item in data:
 
233
  if str(item.get(filter_key, '')) == str(filter_val):
234
  matched_item = item
235
  break
@@ -242,7 +249,7 @@ class DatasetCommandCenter:
242
  def _apply_projection(self, row, recipe):
243
  new_row = {}
244
 
245
- # Eval Context
246
  eval_context = row.copy()
247
  eval_context['row'] = row
248
  eval_context['json'] = json
@@ -334,9 +341,10 @@ The following operations were applied to the source data:
334
  ds_stream = load_dataset(source_id, name=conf, split=split, streaming=True, token=self.token)
335
  count = 0
336
  for i, row in enumerate(ds_stream):
337
-
338
  if max_rows and count >= int(max_rows):
339
  break
 
 
340
  row = dict(row)
341
 
342
  # 1. Filter
@@ -391,8 +399,10 @@ The following operations were applied to the source data:
391
 
392
  for i, row in enumerate(ds_stream):
393
  if len(processed) >= 5: break
394
- row = dict(row)
395
 
 
 
 
396
  # Check Filter
397
  passed = True
398
  if recipe.get('filter_rule'):
 
79
 
80
  def _sanitize_for_json(self, obj):
81
  """
82
+ Recursively cleans data for JSON serialization.
83
  """
84
  if isinstance(obj, float):
85
  if math.isnan(obj) or math.isinf(obj):
 
133
  for i, row in enumerate(ds_stream):
134
  if i >= 10: break
135
 
136
+ # CRITICAL FIX: Force Materialization
137
+ row = dict(row)
138
+
139
  # Clean row for UI
140
  clean_row = self._sanitize_for_json(row)
141
  sample_rows.append(clean_row)
 
182
  def _get_value_by_path(self, obj, path):
183
  """
184
  Retrieves a value from the row.
 
 
185
  """
186
  if not path: return obj
187
 
 
196
  current = obj
197
 
198
  for i, key in enumerate(keys):
 
199
  try:
200
+ # Use get() if possible, or key access
201
+ if isinstance(current, dict):
202
+ current = current.get(key)
203
+ else:
204
+ return None
205
  except:
206
+ return None
207
 
208
+ if current is None: return None
209
+
210
  # Lazy Parsing: Only parse string if we need to go deeper
211
  is_last_key = (i == len(keys) - 1)
212
  if not is_last_key and isinstance(current, str):
 
215
  try:
216
  current = json.loads(s)
217
  except:
218
+ return None
219
 
220
  return current
221
 
 
236
 
237
  matched_item = None
238
  for item in data:
239
+ # String comparison for safety
240
  if str(item.get(filter_key, '')) == str(filter_val):
241
  matched_item = item
242
  break
 
249
  def _apply_projection(self, row, recipe):
250
  new_row = {}
251
 
252
+ # Eval Context (requires explicit dict)
253
  eval_context = row.copy()
254
  eval_context['row'] = row
255
  eval_context['json'] = json
 
341
  ds_stream = load_dataset(source_id, name=conf, split=split, streaming=True, token=self.token)
342
  count = 0
343
  for i, row in enumerate(ds_stream):
 
344
  if max_rows and count >= int(max_rows):
345
  break
346
+
347
+ # CRITICAL FIX: Force Materialization
348
  row = dict(row)
349
 
350
  # 1. Filter
 
399
 
400
  for i, row in enumerate(ds_stream):
401
  if len(processed) >= 5: break
 
402
 
403
+ # CRITICAL FIX: Force Materialization
404
+ row = dict(row)
405
+
406
  # Check Filter
407
  passed = True
408
  if recipe.get('filter_rule'):