File size: 7,039 Bytes
8755993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
"""
Path obfuscation module for privacy-preserving codebase indexing.

Implements HMAC-based path component hashing to mask sensitive file paths
while preserving directory structure for retrieval. Inspired by Cursor's
privacy features.
"""

import hashlib
import hmac
import json
import logging
import secrets
from pathlib import Path
from typing import Dict, Optional

logger = logging.getLogger(__name__)


class PathObfuscator:
    """
    Obfuscates file paths using HMAC-based hashing.
    
    Each path component (directory/file name) is hashed separately,
    preserving the directory structure while masking actual names.
    
    Example:
        src/payments/invoice_processor.py -> a9f3/x72k/qp1m8d.f4
    """
    
    def __init__(self, secret_key: Optional[str] = None, mapping_file: Optional[str] = None):
        """
        Initialize path obfuscator.
        
        Args:
            secret_key: Secret key for HMAC (auto-generated if not provided)
            mapping_file: File to store path mappings for decryption
        """
        self.secret_key = secret_key or self._generate_key()
        self.mapping_file = mapping_file or "chroma_db/.path_mapping.json"
        
        # Load existing mappings
        self.obfuscated_to_original: Dict[str, str] = {}
        self.original_to_obfuscated: Dict[str, str] = {}
        self._load_mappings()
    
    def _generate_key(self) -> str:
        """Generate a random secret key."""
        return secrets.token_hex(32)
    
    def _hash_component(self, component: str) -> str:
        """
        Hash a single path component using HMAC.
        
        Args:
            component: Path component (directory or file name)
            
        Returns:
            Hashed component (shortened for readability)
        """
        # Use HMAC-SHA256 for secure hashing
        h = hmac.new(
            self.secret_key.encode(),
            component.encode(),
            hashlib.sha256
        )
        
        # Take first 8 characters of hex digest for readability
        return h.hexdigest()[:8]
    
    def obfuscate_path(self, original_path: str) -> str:
        """
        Obfuscate a file path.
        
        Args:
            original_path: Original file path (e.g., "src/payments/invoice.py")
            
        Returns:
            Obfuscated path (e.g., "a9f3/x72k/qp1m8d.f4")
        """
        # Check if already obfuscated
        if original_path in self.original_to_obfuscated:
            return self.original_to_obfuscated[original_path]
        
        # Split path into components
        path_obj = Path(original_path)
        components = list(path_obj.parts)
        
        # Hash each component
        obfuscated_components = []
        for component in components:
            # Preserve file extension for type identification
            if '.' in component and component == components[-1]:
                # This is a file with extension
                name, ext = component.rsplit('.', 1)
                hashed_name = self._hash_component(name)
                # Shorten extension hash
                hashed_ext = self._hash_component(ext)[:2]
                obfuscated_components.append(f"{hashed_name}.{hashed_ext}")
            else:
                # Directory or file without extension
                obfuscated_components.append(self._hash_component(component))
        
        # Reconstruct path
        obfuscated_path = '/'.join(obfuscated_components)
        
        # Store mapping
        self.original_to_obfuscated[original_path] = obfuscated_path
        self.obfuscated_to_original[obfuscated_path] = original_path
        self._save_mappings()
        
        logger.debug(f"Obfuscated: {original_path} -> {obfuscated_path}")
        return obfuscated_path
    
    def deobfuscate_path(self, obfuscated_path: str) -> Optional[str]:
        """
        Deobfuscate a file path.
        
        Args:
            obfuscated_path: Obfuscated path
            
        Returns:
            Original path or None if not found
        """
        return self.obfuscated_to_original.get(obfuscated_path)
    
    def _load_mappings(self):
        """Load path mappings from disk."""
        mapping_path = Path(self.mapping_file)
        
        if not mapping_path.exists():
            logger.info(f"No existing path mappings found at {self.mapping_file}")
            return
        
        try:
            with open(mapping_path, 'r') as f:
                data = json.load(f)
            
            self.obfuscated_to_original = data.get('obfuscated_to_original', {})
            self.original_to_obfuscated = data.get('original_to_obfuscated', {})
            
            logger.info(f"Loaded {len(self.original_to_obfuscated)} path mappings")
        except Exception as e:
            logger.error(f"Failed to load path mappings: {e}")
    
    def _save_mappings(self):
        """Save path mappings to disk."""
        mapping_path = Path(self.mapping_file)
        mapping_path.parent.mkdir(parents=True, exist_ok=True)
        
        try:
            data = {
                'obfuscated_to_original': self.obfuscated_to_original,
                'original_to_obfuscated': self.original_to_obfuscated,
                'secret_key': self.secret_key  # Store for consistency
            }
            
            with open(mapping_path, 'w') as f:
                json.dump(data, f, indent=2)
            
            logger.debug(f"Saved {len(self.original_to_obfuscated)} path mappings")
        except Exception as e:
            logger.error(f"Failed to save path mappings: {e}")
    
    def clear_mappings(self):
        """Clear all path mappings."""
        self.obfuscated_to_original.clear()
        self.original_to_obfuscated.clear()
        
        mapping_path = Path(self.mapping_file)
        if mapping_path.exists():
            mapping_path.unlink()
        
        logger.info("Cleared all path mappings")
    
    def get_stats(self) -> Dict[str, int]:
        """Get statistics about path mappings."""
        return {
            'total_paths': len(self.original_to_obfuscated),
            'unique_directories': len(set(
                str(Path(p).parent) for p in self.original_to_obfuscated.keys()
            ))
        }


# Global obfuscator instance
_obfuscator: Optional[PathObfuscator] = None


def get_obfuscator(
    secret_key: Optional[str] = None,
    mapping_file: Optional[str] = None
) -> PathObfuscator:
    """
    Get the global path obfuscator instance.
    
    Args:
        secret_key: Secret key for HMAC (auto-generated if not provided)
        mapping_file: File to store path mappings
        
    Returns:
        PathObfuscator instance
    """
    global _obfuscator
    
    if _obfuscator is None:
        _obfuscator = PathObfuscator(secret_key, mapping_file)
    
    return _obfuscator


def reset_obfuscator():
    """Reset the global obfuscator (useful for testing)."""
    global _obfuscator
    _obfuscator = None