Spaces:
Running
Running
File size: 7,039 Bytes
8755993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
"""
Path obfuscation module for privacy-preserving codebase indexing.
Implements HMAC-based path component hashing to mask sensitive file paths
while preserving directory structure for retrieval. Inspired by Cursor's
privacy features.
"""
import hashlib
import hmac
import json
import logging
import secrets
from pathlib import Path
from typing import Dict, Optional
logger = logging.getLogger(__name__)
class PathObfuscator:
"""
Obfuscates file paths using HMAC-based hashing.
Each path component (directory/file name) is hashed separately,
preserving the directory structure while masking actual names.
Example:
src/payments/invoice_processor.py -> a9f3/x72k/qp1m8d.f4
"""
def __init__(self, secret_key: Optional[str] = None, mapping_file: Optional[str] = None):
"""
Initialize path obfuscator.
Args:
secret_key: Secret key for HMAC (auto-generated if not provided)
mapping_file: File to store path mappings for decryption
"""
self.secret_key = secret_key or self._generate_key()
self.mapping_file = mapping_file or "chroma_db/.path_mapping.json"
# Load existing mappings
self.obfuscated_to_original: Dict[str, str] = {}
self.original_to_obfuscated: Dict[str, str] = {}
self._load_mappings()
def _generate_key(self) -> str:
"""Generate a random secret key."""
return secrets.token_hex(32)
def _hash_component(self, component: str) -> str:
"""
Hash a single path component using HMAC.
Args:
component: Path component (directory or file name)
Returns:
Hashed component (shortened for readability)
"""
# Use HMAC-SHA256 for secure hashing
h = hmac.new(
self.secret_key.encode(),
component.encode(),
hashlib.sha256
)
# Take first 8 characters of hex digest for readability
return h.hexdigest()[:8]
def obfuscate_path(self, original_path: str) -> str:
"""
Obfuscate a file path.
Args:
original_path: Original file path (e.g., "src/payments/invoice.py")
Returns:
Obfuscated path (e.g., "a9f3/x72k/qp1m8d.f4")
"""
# Check if already obfuscated
if original_path in self.original_to_obfuscated:
return self.original_to_obfuscated[original_path]
# Split path into components
path_obj = Path(original_path)
components = list(path_obj.parts)
# Hash each component
obfuscated_components = []
for component in components:
# Preserve file extension for type identification
if '.' in component and component == components[-1]:
# This is a file with extension
name, ext = component.rsplit('.', 1)
hashed_name = self._hash_component(name)
# Shorten extension hash
hashed_ext = self._hash_component(ext)[:2]
obfuscated_components.append(f"{hashed_name}.{hashed_ext}")
else:
# Directory or file without extension
obfuscated_components.append(self._hash_component(component))
# Reconstruct path
obfuscated_path = '/'.join(obfuscated_components)
# Store mapping
self.original_to_obfuscated[original_path] = obfuscated_path
self.obfuscated_to_original[obfuscated_path] = original_path
self._save_mappings()
logger.debug(f"Obfuscated: {original_path} -> {obfuscated_path}")
return obfuscated_path
def deobfuscate_path(self, obfuscated_path: str) -> Optional[str]:
"""
Deobfuscate a file path.
Args:
obfuscated_path: Obfuscated path
Returns:
Original path or None if not found
"""
return self.obfuscated_to_original.get(obfuscated_path)
def _load_mappings(self):
"""Load path mappings from disk."""
mapping_path = Path(self.mapping_file)
if not mapping_path.exists():
logger.info(f"No existing path mappings found at {self.mapping_file}")
return
try:
with open(mapping_path, 'r') as f:
data = json.load(f)
self.obfuscated_to_original = data.get('obfuscated_to_original', {})
self.original_to_obfuscated = data.get('original_to_obfuscated', {})
logger.info(f"Loaded {len(self.original_to_obfuscated)} path mappings")
except Exception as e:
logger.error(f"Failed to load path mappings: {e}")
def _save_mappings(self):
"""Save path mappings to disk."""
mapping_path = Path(self.mapping_file)
mapping_path.parent.mkdir(parents=True, exist_ok=True)
try:
data = {
'obfuscated_to_original': self.obfuscated_to_original,
'original_to_obfuscated': self.original_to_obfuscated,
'secret_key': self.secret_key # Store for consistency
}
with open(mapping_path, 'w') as f:
json.dump(data, f, indent=2)
logger.debug(f"Saved {len(self.original_to_obfuscated)} path mappings")
except Exception as e:
logger.error(f"Failed to save path mappings: {e}")
def clear_mappings(self):
"""Clear all path mappings."""
self.obfuscated_to_original.clear()
self.original_to_obfuscated.clear()
mapping_path = Path(self.mapping_file)
if mapping_path.exists():
mapping_path.unlink()
logger.info("Cleared all path mappings")
def get_stats(self) -> Dict[str, int]:
"""Get statistics about path mappings."""
return {
'total_paths': len(self.original_to_obfuscated),
'unique_directories': len(set(
str(Path(p).parent) for p in self.original_to_obfuscated.keys()
))
}
# Global obfuscator instance
_obfuscator: Optional[PathObfuscator] = None
def get_obfuscator(
secret_key: Optional[str] = None,
mapping_file: Optional[str] = None
) -> PathObfuscator:
"""
Get the global path obfuscator instance.
Args:
secret_key: Secret key for HMAC (auto-generated if not provided)
mapping_file: File to store path mappings
Returns:
PathObfuscator instance
"""
global _obfuscator
if _obfuscator is None:
_obfuscator = PathObfuscator(secret_key, mapping_file)
return _obfuscator
def reset_obfuscator():
"""Reset the global obfuscator (useful for testing)."""
global _obfuscator
_obfuscator = None
|