Instructions to use FuryAssassin/CheckpointCleanup-Release with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use FuryAssassin/CheckpointCleanup-Release with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("FuryAssassin/CheckpointCleanup-Release", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import os, re, json, hashlib, datetime, sys | |
| from pathlib import Path | |
| root = Path('checkpoints') | |
| if not root.exists(): | |
| print('checkpoints directory not found') | |
| sys.exit(1) | |
| step_dirs = [] | |
| for p in root.iterdir(): | |
| if p.is_dir() and re.match(r'step_\d+$', p.name): | |
| step_dirs.append(p) | |
| step_dirs.sort(key=lambda x: int(x.name.split('_')[1])) | |
| print(f'Found {len(step_dirs)} step dirs') | |
| configs = {} | |
| raw_map = {} | |
| for d in step_dirs: | |
| cfg = d / 'config.json' | |
| if cfg.exists(): | |
| try: | |
| obj = json.load(cfg.open()) | |
| norm = json.dumps(obj, sort_keys=True, separators=(',',':')) | |
| except Exception as e: | |
| # fallback to raw normalized whitespace | |
| txt = cfg.read_text() | |
| norm = '\n'.join([line.strip() for line in txt.splitlines() if line.strip()]) | |
| h = hashlib.md5(norm.encode()).hexdigest() | |
| configs.setdefault(h, {'norm':norm,'steps':[]})['steps'].append((int(d.name.split('_')[1]), d)) | |
| raw_map[d] = {'hash':h,'path':cfg} | |
| else: | |
| print(f'No config in {d}') | |
| print('Groups:') | |
| for h,v in configs.items(): | |
| steps_sorted = sorted(v['steps'], key=lambda x: x[0]) | |
| print(h, '->', [s for s,_ in steps_sorted]) | |
| # For each group, keep config in highest step, replace others with pointer | |
| now = datetime.datetime.utcnow().isoformat()+'Z' | |
| for h,v in configs.items(): | |
| steps = v['steps'] | |
| max_step, max_dir = max(steps, key=lambda x:x[0]) | |
| print(f'Canonical for hash {h} is step_{max_step}') | |
| for s,d in steps: | |
| cfg = d / 'config.json' | |
| if s == max_step: | |
| print(f'Keeping canonical config in {d}') | |
| continue | |
| # remove config.json but DO NOT delete pytorch_model.bin | |
| try: | |
| cfg.unlink() | |
| print(f'Removed {cfg}') | |
| except Exception as e: | |
| print('Failed to remove', cfg, e) | |
| # create pointer file | |
| pointer = d / 'config_pointer.txt' | |
| rel = os.path.relpath(max_dir / 'config.json', d) | |
| content = f"This config was consolidated during repository cleanup on {now}.\nCanonical config retained at: {rel}\nOriginal step: step_{s}\nCanonical step: step_{max_step}\nMD5: {h}\n" | |
| pointer.write_text(content) | |
| print(f'Wrote pointer {pointer}') | |
| # Update README.md: insert short note in section '## 4. How to Run Locally' | |
| readme = Path('README.md') | |
| if readme.exists(): | |
| txt = readme.read_text() | |
| insert_after = '## 4. How to Run Locally' | |
| note = '\n\n> Note: Checkpoints cleanup — duplicate config.json files across checkpoints have been consolidated. For each unique config, only the highest-numbered step retains the canonical config; other steps now contain a pointer file. No pytorch_model.bin files were removed.\n' | |
| if insert_after in txt: | |
| parts = txt.split(insert_after,1) | |
| newtxt = parts[0] + insert_after + note + parts[1] | |
| readme.write_text(newtxt) | |
| print('Updated README.md with cleanup note') | |
| else: | |
| print('Could not find section to insert note; appending at end') | |
| readme.write_text(txt + '\n\n' + note) | |
| else: | |
| print('README.md not found') | |
| print('Done') | |