devjas1
(SYNC): bring parity backend (utils/ scripts/ models/ tests/) from feat/ui-parity-rebuild; no UI changes
6373c5a
#!/usr/bin/env python3
"""
audit.py - quick audit tool for preprocessing baseline
Searches for relevant keywords in the ml-polymer-recycling repo
to confirm what preprocessing steps (resample, baseline, smooth,
normalize, etc.) are actually implemented in code/docs.
"""
import re
from pathlib import Path
# ||== KEYWORDS TO TRACE ==||
KEYWORDS = [
"resample", "baseline", "smooth", "Savitz",
"normalize", "minmax" "TARGET_LENGTH", "WINDOW_LENGTH",
"POLYORDER", "DEGREE", "input_length", "target_len", "Figure2CNN", "ResNet"
]
# ||==== DIRECTORIES/FILES TO SCAN ====||
TARGETS = [
"scripts/preprocess_dataset.py",
"scripts/run_inferece.py",
"models/",
"utils/",
"README.md",
"GROUND_TRUTH_PIPELINE.md",
"docs/"
]
# ||==== COMPILE REGEX FOR KEYWORDS ====||
pattern = re.compile("|".join(KEYWORDS), re.IGNORECASE)
def scan_file(path: Path):
try:
with path.open(encoding="utf-8", errors="ignore") as f:
for i, line in enumerate(f, 1):
if pattern.search(line):
print(f"{path}:{i}: {line.strip()}")
except Exception as e:
print(f"[ERR] Could not read {path}: {e}")
def main():
root = Path(".").resolve()
for target in TARGETS:
p = root / target
if p.is_file():
scan_file(p)
elif p.is_dir():
for sub in p.rglob("*.py"):
scan_file(sub)
for sub in p.rglob("*.md"):
scan_file(sub)
if __name__ == "__main__":
main()