|
|
""" |
|
|
LlamaIndex RAG for Credit Card Benefits Knowledge Base |
|
|
Provides intelligent context for card recommendations |
|
|
""" |
|
|
|
|
|
import os |
|
|
import logging |
|
|
from typing import Optional, Dict, List |
|
|
from pathlib import Path |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
LLAMAINDEX_AVAILABLE = False |
|
|
try: |
|
|
from llama_index.core import ( |
|
|
VectorStoreIndex, |
|
|
SimpleDirectoryReader, |
|
|
Settings, |
|
|
StorageContext, |
|
|
load_index_from_storage |
|
|
) |
|
|
from llama_index.embeddings.openai import OpenAIEmbedding |
|
|
from llama_index.llms.openai import OpenAI |
|
|
LLAMAINDEX_AVAILABLE = True |
|
|
logger.info("β
LlamaIndex library imported successfully") |
|
|
except ImportError as e: |
|
|
logger.warning(f"β οΈ LlamaIndex not installed: {e}") |
|
|
logger.warning("Install with: pip install llama-index llama-index-embeddings-openai llama-index-llms-openai") |
|
|
except Exception as e: |
|
|
logger.error(f"β Error importing LlamaIndex: {e}") |
|
|
|
|
|
|
|
|
class CardBenefitsRAG: |
|
|
"""RAG system for credit card benefits using LlamaIndex""" |
|
|
|
|
|
def __init__(self, data_dir: str = "data/card_benefits", persist_dir: str = ".index_storage"): |
|
|
""" |
|
|
Initialize LlamaIndex RAG |
|
|
|
|
|
Args: |
|
|
data_dir: Directory containing card benefit markdown files |
|
|
persist_dir: Directory to persist the index |
|
|
""" |
|
|
self.enabled = LLAMAINDEX_AVAILABLE and bool(os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
if not LLAMAINDEX_AVAILABLE: |
|
|
logger.warning("β οΈ LlamaIndex library not available") |
|
|
logger.warning("To enable RAG features:") |
|
|
logger.warning("1. Add 'llama-index' to requirements.txt") |
|
|
logger.warning("2. Run: pip install llama-index llama-index-embeddings-openai llama-index-llms-openai") |
|
|
return |
|
|
|
|
|
if not os.getenv("OPENAI_API_KEY"): |
|
|
logger.warning("β οΈ OPENAI_API_KEY not found in environment") |
|
|
logger.warning("RAG features will be disabled") |
|
|
return |
|
|
|
|
|
self.data_dir = Path(data_dir) |
|
|
self.persist_dir = Path(persist_dir) |
|
|
|
|
|
try: |
|
|
|
|
|
Settings.llm = OpenAI( |
|
|
model="gpt-4-turbo-preview", |
|
|
temperature=0.1, |
|
|
api_key=os.getenv("OPENAI_API_KEY") |
|
|
) |
|
|
Settings.embed_model = OpenAIEmbedding( |
|
|
model="text-embedding-3-small", |
|
|
api_key=os.getenv("OPENAI_API_KEY") |
|
|
) |
|
|
Settings.chunk_size = 512 |
|
|
Settings.chunk_overlap = 50 |
|
|
|
|
|
|
|
|
if self.persist_dir.exists(): |
|
|
logger.info("π Loading existing LlamaIndex from storage...") |
|
|
try: |
|
|
storage_context = StorageContext.from_defaults(persist_dir=str(self.persist_dir)) |
|
|
self.index = load_index_from_storage(storage_context) |
|
|
logger.info("β
Index loaded from storage") |
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Could not load index from storage: {e}") |
|
|
logger.info("Creating new index...") |
|
|
self._create_index() |
|
|
else: |
|
|
logger.info("π Creating new LlamaIndex from documents...") |
|
|
self._create_index() |
|
|
|
|
|
|
|
|
self.query_engine = self.index.as_query_engine( |
|
|
similarity_top_k=3, |
|
|
response_mode="compact" |
|
|
) |
|
|
|
|
|
logger.info("β
CardBenefitsRAG initialized successfully") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Failed to initialize CardBenefitsRAG: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
self.enabled = False |
|
|
|
|
|
def _create_index(self): |
|
|
"""Create index from documents""" |
|
|
if not self.data_dir.exists(): |
|
|
logger.warning(f"β οΈ Data directory not found: {self.data_dir}") |
|
|
logger.info("Creating data directory with sample file...") |
|
|
self.data_dir.mkdir(parents=True, exist_ok=True) |
|
|
self._create_sample_data() |
|
|
|
|
|
|
|
|
files = list(self.data_dir.glob("*.md")) + list(self.data_dir.glob("*.txt")) |
|
|
if not files: |
|
|
logger.warning("β οΈ No markdown or text files found in data directory") |
|
|
logger.info("Creating sample file...") |
|
|
self._create_sample_data() |
|
|
|
|
|
|
|
|
try: |
|
|
documents = SimpleDirectoryReader( |
|
|
str(self.data_dir), |
|
|
required_exts=[".md", ".txt"], |
|
|
recursive=False |
|
|
).load_data() |
|
|
|
|
|
logger.info(f"π Loaded {len(documents)} documents") |
|
|
|
|
|
if not documents: |
|
|
logger.error("β No documents loaded. Check data directory.") |
|
|
self.enabled = False |
|
|
return |
|
|
|
|
|
|
|
|
self.index = VectorStoreIndex.from_documents( |
|
|
documents, |
|
|
show_progress=True |
|
|
) |
|
|
|
|
|
|
|
|
self.persist_dir.mkdir(parents=True, exist_ok=True) |
|
|
self.index.storage_context.persist(persist_dir=str(self.persist_dir)) |
|
|
logger.info(f"πΎ Index persisted to {self.persist_dir}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Failed to create index: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
self.enabled = False |
|
|
|
|
|
def _create_sample_data(self): |
|
|
"""Create sample card benefit file if none exist""" |
|
|
sample_file = self.data_dir / "sample_card.md" |
|
|
sample_content = """# Sample Credit Card |
|
|
|
|
|
## Earning Rates |
|
|
- 4x points at restaurants |
|
|
- 4x points at U.S. supermarkets (up to $25,000/year) |
|
|
- 3x points on flights |
|
|
- 1x points on everything else |
|
|
|
|
|
## Annual Fee |
|
|
$250 (offset by $240 in credits) |
|
|
|
|
|
## Best For |
|
|
Dining and grocery spending |
|
|
|
|
|
## Important Notes |
|
|
- Supercenters like Walmart and Target do NOT count as supermarkets |
|
|
- Must activate credits to receive full value |
|
|
- No foreign transaction fees |
|
|
""" |
|
|
sample_file.write_text(sample_content) |
|
|
logger.info(f"π Created sample file: {sample_file}") |
|
|
|
|
|
def query_benefits(self, card_name: str, question: str) -> Optional[str]: |
|
|
""" |
|
|
Query card benefits |
|
|
|
|
|
Args: |
|
|
card_name: Name of the card |
|
|
question: Question about the card |
|
|
|
|
|
Returns: |
|
|
Answer from RAG or None |
|
|
""" |
|
|
if not self.enabled: |
|
|
logger.warning("RAG query skipped (not enabled)") |
|
|
return None |
|
|
|
|
|
try: |
|
|
query = f"For {card_name}: {question}" |
|
|
logger.info(f"π RAG Query: {query}") |
|
|
response = self.query_engine.query(query) |
|
|
return str(response) |
|
|
except Exception as e: |
|
|
logger.error(f"β Query failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return None |
|
|
|
|
|
def get_card_context(self, card_name: str, merchant: str, category: str) -> Optional[str]: |
|
|
""" |
|
|
Get relevant context for a card recommendation |
|
|
|
|
|
Args: |
|
|
card_name: Recommended card |
|
|
merchant: Merchant name |
|
|
category: Spending category |
|
|
|
|
|
Returns: |
|
|
Relevant context or None |
|
|
""" |
|
|
if not self.enabled: |
|
|
return None |
|
|
|
|
|
try: |
|
|
query = f"""For {card_name} when shopping at {merchant} ({category} category): |
|
|
1. What are the earning rates for {category} purchases? |
|
|
2. Are there any spending caps or exclusions relevant to {merchant}? |
|
|
3. What are 2-3 key benefits or warnings for this type of purchase? |
|
|
|
|
|
Provide a concise summary in 2-3 sentences.""" |
|
|
|
|
|
logger.info(f"π Context Query: {card_name} at {merchant}") |
|
|
response = self.query_engine.query(query) |
|
|
result = str(response) |
|
|
|
|
|
|
|
|
if len(result) > 500: |
|
|
result = result[:500] + "..." |
|
|
|
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Context retrieval failed: {e}") |
|
|
return None |
|
|
|
|
|
def compare_cards(self, card1: str, card2: str, category: str) -> Optional[str]: |
|
|
""" |
|
|
Compare two cards for a specific category |
|
|
|
|
|
Args: |
|
|
card1: First card name |
|
|
card2: Second card name |
|
|
category: Spending category |
|
|
|
|
|
Returns: |
|
|
Comparison or None |
|
|
""" |
|
|
if not self.enabled: |
|
|
return None |
|
|
|
|
|
try: |
|
|
query = f"Compare {card1} vs {card2} for {category} spending. Which is better and why? Provide a concise answer in 2-3 sentences." |
|
|
logger.info(f"π Comparison: {card1} vs {card2} for {category}") |
|
|
response = self.query_engine.query(query) |
|
|
return str(response) |
|
|
except Exception as e: |
|
|
logger.error(f"β Comparison failed: {e}") |
|
|
return None |
|
|
|
|
|
def get_spending_warnings(self, card_name: str, category: str, amount: float) -> Optional[str]: |
|
|
""" |
|
|
Get warnings about spending caps or limitations |
|
|
|
|
|
Args: |
|
|
card_name: Card name |
|
|
category: Spending category |
|
|
amount: Transaction amount |
|
|
|
|
|
Returns: |
|
|
Warnings or None |
|
|
""" |
|
|
if not self.enabled: |
|
|
return None |
|
|
|
|
|
try: |
|
|
query = f"For {card_name} and a ${amount:.2f} purchase in {category} category: Are there any spending caps, annual limits, or exclusions I should know about? Be specific and concise." |
|
|
logger.info(f"π Warnings: {card_name} ${amount} in {category}") |
|
|
response = self.query_engine.query(query) |
|
|
return str(response) |
|
|
except Exception as e: |
|
|
logger.error(f"β Warning retrieval failed: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
_rag_instance = None |
|
|
|
|
|
def get_card_benefits_rag() -> CardBenefitsRAG: |
|
|
"""Get or create the global RAG instance""" |
|
|
global _rag_instance |
|
|
if _rag_instance is None: |
|
|
_rag_instance = CardBenefitsRAG() |
|
|
return _rag_instance |
|
|
|
|
|
|
|
|
|
|
|
def initialize_rag(): |
|
|
"""Initialize RAG system (call this at app startup)""" |
|
|
logger.info("π Initializing LlamaIndex RAG...") |
|
|
rag = get_card_benefits_rag() |
|
|
if rag.enabled: |
|
|
logger.info("β
RAG initialized and ready") |
|
|
else: |
|
|
logger.warning("β οΈ RAG not available") |
|
|
return rag |