Spaces:
Sleeping
Sleeping
from pathlib import Path | |
from shutil import copy2, rmtree | |
from fastcore.utils import L | |
import re | |
from src.fabrics_processor.config import config | |
def sentence2snake(name: str) -> str: | |
"""Convert any string to snake_case, replacing non-alphanumeric with underscore""" | |
s1 = name.lower() | |
s2 = re.sub(r'\W', r'_', s1) | |
return re.sub(r'_+', r'_', s2) | |
def round_timestamp(ts: float) -> int: | |
"""Round timestamp to handle filesystem differences""" | |
return int(str(ts).split('.')[0][:-4]) | |
def get_md_files_obsidian(path: Path) -> dict: | |
"""Get files from obsidian vault: stem -> (path, timestamp, size)""" | |
# Rename files to snake_case and add identifier to distinguish own prompts from others | |
return {sentence2snake(p.stem)+"-"+p.parent.name.lower(): (p, p.stat().st_mtime, p.stat().st_size) | |
for p in Path(path).glob('**/*.md')} | |
def get_md_files_fabricsfolder(path: Path) -> dict: | |
"""Get files from target structure: dir_name -> (system.md_path, timestamp, size)""" | |
target_subdirs = [x for x in path.iterdir() if x.is_dir()] | |
return {x.stem: (x/'system.md', (x/'system.md').stat().st_mtime, (x/'system.md').stat().st_size) | |
for x in target_subdirs | |
if (x/'system.md').exists()} | |
def get_modified_files(source_files: dict, target_files: dict) -> list: | |
"""Compare timestamps between source and target files, returns dictionary of | |
entries needing updates. The dictionary has the filename as key and the following | |
values: | |
path, timestamp, size""" | |
existing_files = L(k for k in source_files.keys() if k in target_files) | |
# removed checking for timestamp. Because you don't want false positives because of file system differences | |
# or daylight savings. But you also want to be able to update files that have almost the same timestamp | |
# when you change the file. | |
# different_timestamps = L(k for k in existing_files | |
# if round_timestamp(source_files[k][1]) > round_timestamp(target_files[k][1])) | |
# return L(source_files[k][0] for k in different_timestamps | |
# if source_files[k][2] != target_files[k][2]) | |
return L({k: source_files[k]} for k in existing_files if source_files[k][2] != target_files[k][2]) | |
def get_new_files(source_files: dict, target_files: dict) -> list: | |
"""Return list of dictionaries containing with the key as filename and these values: | |
path, timestamp, size""" | |
return L({k: source_files[k]} for k in source_files.keys() if k not in target_files) | |
def process_file(source: dict, target_dir: Path) -> None: | |
""" | |
Process a single file: create directory, copy as system.md, create user.md | |
Args: | |
source: Dict of source file: filename:(path, timestamp, size) | |
target_dir: Base target directory (e.g. 'md_target') | |
""" | |
filename = next(iter(source)) | |
filepath = next(iter(source.values()))[0] | |
subdir = target_dir/filename | |
subdir.mkdir(mode=0o755, exist_ok=True) | |
copy2(filepath, subdir/'system.md') | |
(subdir/'user.md').touch() | |
def sync_folders(source_dir: Path, target_dir: Path) -> None: | |
""" | |
Main function to synchronize folders | |
Args: | |
source_dir: Path to source directory (obsidian vault) | |
target_dir: Path to target directory (fabrics folder) | |
""" | |
source_files = get_md_files_obsidian(Path(source_dir)) | |
target_files = get_md_files_fabricsfolder(Path(target_dir)) | |
# Get all files that need processing | |
files_to_process = L(get_new_files(source_files, target_files) + | |
get_modified_files(source_files, target_files)) | |
# Process each file | |
for i in files_to_process: | |
process_file(i, target_dir) | |
# Get all files that need deleting | |
files_to_delete = L(k for k in target_files.keys() if k not in source_files and "-" in k) | |
# Delete each directory and its contents | |
for file_name in files_to_delete: | |
rmtree(target_dir/file_name) |