Spaces:
Running
Running
devjas1
(SYNC): bring parity backend (utils/ scripts/ models/ tests/) from feat/ui-parity-rebuild; no UI changes
6373c5a
#!/usr/bin/env python3 | |
""" | |
audit.py - quick audit tool for preprocessing baseline | |
Searches for relevant keywords in the ml-polymer-recycling repo | |
to confirm what preprocessing steps (resample, baseline, smooth, | |
normalize, etc.) are actually implemented in code/docs. | |
""" | |
import re | |
from pathlib import Path | |
# ||== KEYWORDS TO TRACE ==|| | |
KEYWORDS = [ | |
"resample", "baseline", "smooth", "Savitz", | |
"normalize", "minmax" "TARGET_LENGTH", "WINDOW_LENGTH", | |
"POLYORDER", "DEGREE", "input_length", "target_len", "Figure2CNN", "ResNet" | |
] | |
# ||==== DIRECTORIES/FILES TO SCAN ====|| | |
TARGETS = [ | |
"scripts/preprocess_dataset.py", | |
"scripts/run_inferece.py", | |
"models/", | |
"utils/", | |
"README.md", | |
"GROUND_TRUTH_PIPELINE.md", | |
"docs/" | |
] | |
# ||==== COMPILE REGEX FOR KEYWORDS ====|| | |
pattern = re.compile("|".join(KEYWORDS), re.IGNORECASE) | |
def scan_file(path: Path): | |
try: | |
with path.open(encoding="utf-8", errors="ignore") as f: | |
for i, line in enumerate(f, 1): | |
if pattern.search(line): | |
print(f"{path}:{i}: {line.strip()}") | |
except Exception as e: | |
print(f"[ERR] Could not read {path}: {e}") | |
def main(): | |
root = Path(".").resolve() | |
for target in TARGETS: | |
p = root / target | |
if p.is_file(): | |
scan_file(p) | |
elif p.is_dir(): | |
for sub in p.rglob("*.py"): | |
scan_file(sub) | |
for sub in p.rglob("*.md"): | |
scan_file(sub) | |
if __name__ == "__main__": | |
main() |