File size: 10,930 Bytes
05f8865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
"""
LlamaIndex RAG for Credit Card Benefits Knowledge Base
Provides intelligent context for card recommendations
"""

import os
import logging
from typing import Optional, Dict, List
from pathlib import Path

logger = logging.getLogger(__name__)

# Check if LlamaIndex is available
LLAMAINDEX_AVAILABLE = False
try:
    from llama_index.core import (
        VectorStoreIndex, 
        SimpleDirectoryReader, 
        Settings,
        StorageContext,
        load_index_from_storage
    )
    from llama_index.embeddings.openai import OpenAIEmbedding
    from llama_index.llms.openai import OpenAI
    LLAMAINDEX_AVAILABLE = True
    logger.info("βœ… LlamaIndex library imported successfully")
except ImportError as e:
    logger.warning(f"⚠️ LlamaIndex not installed: {e}")
    logger.warning("Install with: pip install llama-index llama-index-embeddings-openai llama-index-llms-openai")
except Exception as e:
    logger.error(f"❌ Error importing LlamaIndex: {e}")


class CardBenefitsRAG:
    """RAG system for credit card benefits using LlamaIndex"""
    
    def __init__(self, data_dir: str = "data/card_benefits", persist_dir: str = ".index_storage"):
        """
        Initialize LlamaIndex RAG
        
        Args:
            data_dir: Directory containing card benefit markdown files
            persist_dir: Directory to persist the index
        """
        self.enabled = LLAMAINDEX_AVAILABLE and bool(os.getenv("OPENAI_API_KEY"))
        
        if not LLAMAINDEX_AVAILABLE:
            logger.warning("⚠️ LlamaIndex library not available")
            logger.warning("To enable RAG features:")
            logger.warning("1. Add 'llama-index' to requirements.txt")
            logger.warning("2. Run: pip install llama-index llama-index-embeddings-openai llama-index-llms-openai")
            return
        
        if not os.getenv("OPENAI_API_KEY"):
            logger.warning("⚠️ OPENAI_API_KEY not found in environment")
            logger.warning("RAG features will be disabled")
            return
        
        self.data_dir = Path(data_dir)
        self.persist_dir = Path(persist_dir)
        
        try:
            # Configure LlamaIndex settings
            Settings.llm = OpenAI(
                model="gpt-4-turbo-preview",
                temperature=0.1,
                api_key=os.getenv("OPENAI_API_KEY")
            )
            Settings.embed_model = OpenAIEmbedding(
                model="text-embedding-3-small",
                api_key=os.getenv("OPENAI_API_KEY")
            )
            Settings.chunk_size = 512
            Settings.chunk_overlap = 50
            
            # Load or create index
            if self.persist_dir.exists():
                logger.info("πŸ“š Loading existing LlamaIndex from storage...")
                try:
                    storage_context = StorageContext.from_defaults(persist_dir=str(self.persist_dir))
                    self.index = load_index_from_storage(storage_context)
                    logger.info("βœ… Index loaded from storage")
                except Exception as e:
                    logger.warning(f"⚠️ Could not load index from storage: {e}")
                    logger.info("Creating new index...")
                    self._create_index()
            else:
                logger.info("πŸ“š Creating new LlamaIndex from documents...")
                self._create_index()
            
            # Create query engine
            self.query_engine = self.index.as_query_engine(
                similarity_top_k=3,
                response_mode="compact"
            )
            
            logger.info("βœ… CardBenefitsRAG initialized successfully")
            
        except Exception as e:
            logger.error(f"❌ Failed to initialize CardBenefitsRAG: {e}")
            import traceback
            traceback.print_exc()
            self.enabled = False
    
    def _create_index(self):
        """Create index from documents"""
        if not self.data_dir.exists():
            logger.warning(f"⚠️ Data directory not found: {self.data_dir}")
            logger.info("Creating data directory with sample file...")
            self.data_dir.mkdir(parents=True, exist_ok=True)
            self._create_sample_data()
        
        # Check if directory has any files
        files = list(self.data_dir.glob("*.md")) + list(self.data_dir.glob("*.txt"))
        if not files:
            logger.warning("⚠️ No markdown or text files found in data directory")
            logger.info("Creating sample file...")
            self._create_sample_data()
        
        # Load documents
        try:
            documents = SimpleDirectoryReader(
                str(self.data_dir),
                required_exts=[".md", ".txt"],
                recursive=False
            ).load_data()
            
            logger.info(f"πŸ“„ Loaded {len(documents)} documents")
            
            if not documents:
                logger.error("❌ No documents loaded. Check data directory.")
                self.enabled = False
                return
            
            # Create index
            self.index = VectorStoreIndex.from_documents(
                documents,
                show_progress=True
            )
            
            # Persist index
            self.persist_dir.mkdir(parents=True, exist_ok=True)
            self.index.storage_context.persist(persist_dir=str(self.persist_dir))
            logger.info(f"πŸ’Ύ Index persisted to {self.persist_dir}")
            
        except Exception as e:
            logger.error(f"❌ Failed to create index: {e}")
            import traceback
            traceback.print_exc()
            self.enabled = False
    
    def _create_sample_data(self):
        """Create sample card benefit file if none exist"""
        sample_file = self.data_dir / "sample_card.md"
        sample_content = """# Sample Credit Card

## Earning Rates
- 4x points at restaurants
- 4x points at U.S. supermarkets (up to $25,000/year)
- 3x points on flights
- 1x points on everything else

## Annual Fee
$250 (offset by $240 in credits)

## Best For
Dining and grocery spending

## Important Notes
- Supercenters like Walmart and Target do NOT count as supermarkets
- Must activate credits to receive full value
- No foreign transaction fees
"""
        sample_file.write_text(sample_content)
        logger.info(f"πŸ“ Created sample file: {sample_file}")
    
    def query_benefits(self, card_name: str, question: str) -> Optional[str]:
        """
        Query card benefits
        
        Args:
            card_name: Name of the card
            question: Question about the card
        
        Returns:
            Answer from RAG or None
        """
        if not self.enabled:
            logger.warning("RAG query skipped (not enabled)")
            return None
        
        try:
            query = f"For {card_name}: {question}"
            logger.info(f"πŸ” RAG Query: {query}")
            response = self.query_engine.query(query)
            return str(response)
        except Exception as e:
            logger.error(f"❌ Query failed: {e}")
            import traceback
            traceback.print_exc()
            return None
    
    def get_card_context(self, card_name: str, merchant: str, category: str) -> Optional[str]:
        """
        Get relevant context for a card recommendation
        
        Args:
            card_name: Recommended card
            merchant: Merchant name
            category: Spending category
        
        Returns:
            Relevant context or None
        """
        if not self.enabled:
            return None
        
        try:
            query = f"""For {card_name} when shopping at {merchant} ({category} category):
1. What are the earning rates for {category} purchases?
2. Are there any spending caps or exclusions relevant to {merchant}?
3. What are 2-3 key benefits or warnings for this type of purchase?

Provide a concise summary in 2-3 sentences."""
            
            logger.info(f"πŸ” Context Query: {card_name} at {merchant}")
            response = self.query_engine.query(query)
            result = str(response)
            
            # Clean up response
            if len(result) > 500:
                result = result[:500] + "..."
            
            return result
            
        except Exception as e:
            logger.error(f"❌ Context retrieval failed: {e}")
            return None
    
    def compare_cards(self, card1: str, card2: str, category: str) -> Optional[str]:
        """
        Compare two cards for a specific category
        
        Args:
            card1: First card name
            card2: Second card name
            category: Spending category
        
        Returns:
            Comparison or None
        """
        if not self.enabled:
            return None
        
        try:
            query = f"Compare {card1} vs {card2} for {category} spending. Which is better and why? Provide a concise answer in 2-3 sentences."
            logger.info(f"πŸ” Comparison: {card1} vs {card2} for {category}")
            response = self.query_engine.query(query)
            return str(response)
        except Exception as e:
            logger.error(f"❌ Comparison failed: {e}")
            return None
    
    def get_spending_warnings(self, card_name: str, category: str, amount: float) -> Optional[str]:
        """
        Get warnings about spending caps or limitations
        
        Args:
            card_name: Card name
            category: Spending category
            amount: Transaction amount
        
        Returns:
            Warnings or None
        """
        if not self.enabled:
            return None
        
        try:
            query = f"For {card_name} and a ${amount:.2f} purchase in {category} category: Are there any spending caps, annual limits, or exclusions I should know about? Be specific and concise."
            logger.info(f"πŸ” Warnings: {card_name} ${amount} in {category}")
            response = self.query_engine.query(query)
            return str(response)
        except Exception as e:
            logger.error(f"❌ Warning retrieval failed: {e}")
            return None


# Global instance
_rag_instance = None

def get_card_benefits_rag() -> CardBenefitsRAG:
    """Get or create the global RAG instance"""
    global _rag_instance
    if _rag_instance is None:
        _rag_instance = CardBenefitsRAG()
    return _rag_instance


# Initialize on module import (lazy loading)
def initialize_rag():
    """Initialize RAG system (call this at app startup)"""
    logger.info("πŸš€ Initializing LlamaIndex RAG...")
    rag = get_card_benefits_rag()
    if rag.enabled:
        logger.info("βœ… RAG initialized and ready")
    else:
        logger.warning("⚠️ RAG not available")
    return rag