Spaces:
Running
Running
| """ | |
| Path obfuscation module for privacy-preserving codebase indexing. | |
| Implements HMAC-based path component hashing to mask sensitive file paths | |
| while preserving directory structure for retrieval. Inspired by Cursor's | |
| privacy features. | |
| """ | |
| import hashlib | |
| import hmac | |
| import json | |
| import logging | |
| import secrets | |
| from pathlib import Path | |
| from typing import Dict, Optional | |
| logger = logging.getLogger(__name__) | |
| class PathObfuscator: | |
| """ | |
| Obfuscates file paths using HMAC-based hashing. | |
| Each path component (directory/file name) is hashed separately, | |
| preserving the directory structure while masking actual names. | |
| Example: | |
| src/payments/invoice_processor.py -> a9f3/x72k/qp1m8d.f4 | |
| """ | |
| def __init__(self, secret_key: Optional[str] = None, mapping_file: Optional[str] = None): | |
| """ | |
| Initialize path obfuscator. | |
| Args: | |
| secret_key: Secret key for HMAC (auto-generated if not provided) | |
| mapping_file: File to store path mappings for decryption | |
| """ | |
| self.secret_key = secret_key or self._generate_key() | |
| self.mapping_file = mapping_file or "chroma_db/.path_mapping.json" | |
| # Load existing mappings | |
| self.obfuscated_to_original: Dict[str, str] = {} | |
| self.original_to_obfuscated: Dict[str, str] = {} | |
| self._load_mappings() | |
| def _generate_key(self) -> str: | |
| """Generate a random secret key.""" | |
| return secrets.token_hex(32) | |
| def _hash_component(self, component: str) -> str: | |
| """ | |
| Hash a single path component using HMAC. | |
| Args: | |
| component: Path component (directory or file name) | |
| Returns: | |
| Hashed component (shortened for readability) | |
| """ | |
| # Use HMAC-SHA256 for secure hashing | |
| h = hmac.new( | |
| self.secret_key.encode(), | |
| component.encode(), | |
| hashlib.sha256 | |
| ) | |
| # Take first 8 characters of hex digest for readability | |
| return h.hexdigest()[:8] | |
| def obfuscate_path(self, original_path: str) -> str: | |
| """ | |
| Obfuscate a file path. | |
| Args: | |
| original_path: Original file path (e.g., "src/payments/invoice.py") | |
| Returns: | |
| Obfuscated path (e.g., "a9f3/x72k/qp1m8d.f4") | |
| """ | |
| # Check if already obfuscated | |
| if original_path in self.original_to_obfuscated: | |
| return self.original_to_obfuscated[original_path] | |
| # Split path into components | |
| path_obj = Path(original_path) | |
| components = list(path_obj.parts) | |
| # Hash each component | |
| obfuscated_components = [] | |
| for component in components: | |
| # Preserve file extension for type identification | |
| if '.' in component and component == components[-1]: | |
| # This is a file with extension | |
| name, ext = component.rsplit('.', 1) | |
| hashed_name = self._hash_component(name) | |
| # Shorten extension hash | |
| hashed_ext = self._hash_component(ext)[:2] | |
| obfuscated_components.append(f"{hashed_name}.{hashed_ext}") | |
| else: | |
| # Directory or file without extension | |
| obfuscated_components.append(self._hash_component(component)) | |
| # Reconstruct path | |
| obfuscated_path = '/'.join(obfuscated_components) | |
| # Store mapping | |
| self.original_to_obfuscated[original_path] = obfuscated_path | |
| self.obfuscated_to_original[obfuscated_path] = original_path | |
| self._save_mappings() | |
| logger.debug(f"Obfuscated: {original_path} -> {obfuscated_path}") | |
| return obfuscated_path | |
| def deobfuscate_path(self, obfuscated_path: str) -> Optional[str]: | |
| """ | |
| Deobfuscate a file path. | |
| Args: | |
| obfuscated_path: Obfuscated path | |
| Returns: | |
| Original path or None if not found | |
| """ | |
| return self.obfuscated_to_original.get(obfuscated_path) | |
| def _load_mappings(self): | |
| """Load path mappings from disk.""" | |
| mapping_path = Path(self.mapping_file) | |
| if not mapping_path.exists(): | |
| logger.info(f"No existing path mappings found at {self.mapping_file}") | |
| return | |
| try: | |
| with open(mapping_path, 'r') as f: | |
| data = json.load(f) | |
| self.obfuscated_to_original = data.get('obfuscated_to_original', {}) | |
| self.original_to_obfuscated = data.get('original_to_obfuscated', {}) | |
| logger.info(f"Loaded {len(self.original_to_obfuscated)} path mappings") | |
| except Exception as e: | |
| logger.error(f"Failed to load path mappings: {e}") | |
| def _save_mappings(self): | |
| """Save path mappings to disk.""" | |
| mapping_path = Path(self.mapping_file) | |
| mapping_path.parent.mkdir(parents=True, exist_ok=True) | |
| try: | |
| data = { | |
| 'obfuscated_to_original': self.obfuscated_to_original, | |
| 'original_to_obfuscated': self.original_to_obfuscated, | |
| 'secret_key': self.secret_key # Store for consistency | |
| } | |
| with open(mapping_path, 'w') as f: | |
| json.dump(data, f, indent=2) | |
| logger.debug(f"Saved {len(self.original_to_obfuscated)} path mappings") | |
| except Exception as e: | |
| logger.error(f"Failed to save path mappings: {e}") | |
| def clear_mappings(self): | |
| """Clear all path mappings.""" | |
| self.obfuscated_to_original.clear() | |
| self.original_to_obfuscated.clear() | |
| mapping_path = Path(self.mapping_file) | |
| if mapping_path.exists(): | |
| mapping_path.unlink() | |
| logger.info("Cleared all path mappings") | |
| def get_stats(self) -> Dict[str, int]: | |
| """Get statistics about path mappings.""" | |
| return { | |
| 'total_paths': len(self.original_to_obfuscated), | |
| 'unique_directories': len(set( | |
| str(Path(p).parent) for p in self.original_to_obfuscated.keys() | |
| )) | |
| } | |
| # Global obfuscator instance | |
| _obfuscator: Optional[PathObfuscator] = None | |
| def get_obfuscator( | |
| secret_key: Optional[str] = None, | |
| mapping_file: Optional[str] = None | |
| ) -> PathObfuscator: | |
| """ | |
| Get the global path obfuscator instance. | |
| Args: | |
| secret_key: Secret key for HMAC (auto-generated if not provided) | |
| mapping_file: File to store path mappings | |
| Returns: | |
| PathObfuscator instance | |
| """ | |
| global _obfuscator | |
| if _obfuscator is None: | |
| _obfuscator = PathObfuscator(secret_key, mapping_file) | |
| return _obfuscator | |
| def reset_obfuscator(): | |
| """Reset the global obfuscator (useful for testing).""" | |
| global _obfuscator | |
| _obfuscator = None | |