Spaces:

srishtichugh
/

OpenEnv_hack

Running

OpenEnv_hack / server /tasks /task1_missing.py

add ui

40fcf49 1 day ago

1.53 kB

	"""
	Task 1 — Easy: Fill Missing Values
	Objective: Fill all NaN values in the employee records DataFrame.
	Score: 1.0 - (remaining_nulls / original_nulls)
	"""

	from server.data_generator import generate_task1_datasets

	TASK_ID = 1
	MAX_STEPS = 20
	DESCRIPTION = (
	"Task 1 (Easy) — Fill Missing Values\n"
	"You have an employee records dataset with missing values (NaN) in "
	"'age', 'salary', and 'department' columns. "
	"Your goal is to fill all missing values so the dataset is complete.\n\n"
	"Available operation: fill_missing\n"
	" params.strategy: 'median' \| 'mean' \| 'mode' \| 'constant'\n"
	" params.value: (required when strategy='constant') the fill value\n"
	"Example action: {\"operation\": \"fill_missing\", \"column\": \"age\", \"params\": {\"strategy\": \"median\"}}"
	)

	# Cache at module load — seed=42 makes output identical every time
	_DIRTY_TEMPLATE, _CLEAN_DF = generate_task1_datasets()
	_ORIGINAL_NULLS = int(_DIRTY_TEMPLATE.isnull().sum().sum())


	def load():
	"""Return (dirty_df, clean_df, original_null_count) — uses cached template."""
	return _DIRTY_TEMPLATE.copy(), _CLEAN_DF, _ORIGINAL_NULLS


	def score(current_df, original_nulls: int) -> float:
	"""Score in [0, 1]: fraction of nulls filled."""
	if original_nulls == 0:
	return 0.99
	remaining = int(current_df.isnull().sum().sum())
	return round(max(0.01, min(0.99, 1.0 - remaining / original_nulls)), 4)


	def count_errors(current_df) -> int:
	return int(current_df.isnull().sum().sum())