Spaces:

dev-jas
/

polymer-aging-ml

Running

devjas1

(SYNC): bring parity backend (utils/ scripts/ models/ tests/) from feat/ui-parity-rebuild; no UI changes

6373c5a 3 days ago

1.59 kB

	#!/usr/bin/env python3
	"""
	audit.py - quick audit tool for preprocessing baseline

	Searches for relevant keywords in the ml-polymer-recycling repo
	to confirm what preprocessing steps (resample, baseline, smooth,
	normalize, etc.) are actually implemented in code/docs.
	"""

	import re
	from pathlib import Path

	# \|\|== KEYWORDS TO TRACE ==\|\|
	KEYWORDS = [
	"resample", "baseline", "smooth", "Savitz",
	"normalize", "minmax" "TARGET_LENGTH", "WINDOW_LENGTH",
	"POLYORDER", "DEGREE", "input_length", "target_len", "Figure2CNN", "ResNet"
	]

	# \|\|==== DIRECTORIES/FILES TO SCAN ====\|\|
	TARGETS = [
	"scripts/preprocess_dataset.py",
	"scripts/run_inferece.py",
	"models/",
	"utils/",
	"README.md",
	"GROUND_TRUTH_PIPELINE.md",
	"docs/"
	]

	# \|\|==== COMPILE REGEX FOR KEYWORDS ====\|\|
	pattern = re.compile("\|".join(KEYWORDS), re.IGNORECASE)

	def scan_file(path: Path):
	try:
	with path.open(encoding="utf-8", errors="ignore") as f:
	for i, line in enumerate(f, 1):
	if pattern.search(line):
	print(f"{path}:{i}: {line.strip()}")
	except Exception as e:
	print(f"[ERR] Could not read {path}: {e}")

	def main():
	root = Path(".").resolve()
	for target in TARGETS:
	p = root / target
	if p.is_file():
	scan_file(p)
	elif p.is_dir():
	for sub in p.rglob("*.py"):
	scan_file(sub)
	for sub in p.rglob("*.md"):
	scan_file(sub)

	if __name__ == "__main__":
	main()