davanstrien HF staff commited on
Commit
f9987db
1 Parent(s): 4aeaa3b

Add language_threshold_percent parameter to predict_rows function

Browse files
Files changed (1) hide show
  1. main.py +6 -1
main.py CHANGED
@@ -251,6 +251,7 @@ async def predict_language(
251
  int, Query(title="Max number of requests to datasets server", gt=0, le=30)
252
  ] = 10,
253
  number_of_rows: int = 1000,
 
254
  ) -> dict[Any, Any] | None:
255
  is_valid = datasets_server_valid_rows(hub_id)
256
  if not is_valid:
@@ -289,7 +290,11 @@ async def predict_language(
289
  split,
290
  )
291
  logger.info(f"Predicting language for {len(random_rows)} rows")
292
- predictions = predict_rows(random_rows, target_column)
 
 
 
 
293
  predictions["hub_id"] = hub_id
294
  predictions["config"] = config
295
  predictions["split"] = split
 
251
  int, Query(title="Max number of requests to datasets server", gt=0, le=30)
252
  ] = 10,
253
  number_of_rows: int = 1000,
254
+ language_threshold_percent: float = 0.2,
255
  ) -> dict[Any, Any] | None:
256
  is_valid = datasets_server_valid_rows(hub_id)
257
  if not is_valid:
 
290
  split,
291
  )
292
  logger.info(f"Predicting language for {len(random_rows)} rows")
293
+ predictions = predict_rows(
294
+ random_rows,
295
+ target_column,
296
+ language_threshold_percent=language_threshold_percent,
297
+ )
298
  predictions["hub_id"] = hub_id
299
  predictions["config"] = config
300
  predictions["split"] = split