sammy786 commited on
Commit
1c67453
·
verified ·
1 Parent(s): 58819e6

Create docs/mcp_architecture.md

Browse files
Files changed (1) hide show
  1. docs/mcp_architecture.md +602 -0
docs/mcp_architecture.md ADDED
@@ -0,0 +1,602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```markdown
2
+ # MCP Server Implementation Guide
3
+
4
+ ## Overview
5
+
6
+ RewardPilot implements a multi-agent MCP (Model Context Protocol) architecture with 4 independent microservices that work together to provide intelligent credit card recommendations.
7
+
8
+ ## Architecture Diagram
9
+
10
+ ```
11
+ ┌─────────────────────────────────────────────────────────────────┐
12
+ │ User Interface │
13
+ │ (Gradio 6.0 App) │
14
+ └────────────────────────────┬────────────────────────────────────┘
15
+
16
+
17
+ ┌─────────────────────────────────────────────────────────────────┐
18
+ │ Orchestrator Agent │
19
+ │ (Claude 3.5 Sonnet) │
20
+ │ ┌──────────────────────────────────────────────────────────┐ │
21
+ │ │ Phase 1: Planning │ │
22
+ │ │ - Analyze transaction context │ │
23
+ │ │ - Determine required MCP servers │ │
24
+ │ │ - Create execution strategy │ │
25
+ │ └──────────────────────────────────────────────────────────┘ │
26
+ └────────────────────────────┬────────────────────────────────────┘
27
+
28
+ ┌────────────┼────────────┐
29
+ ▼ ▼ ▼
30
+ ┌───────────────┐ ┌──────────┐ ┌────────────┐
31
+ │ Smart Wallet │ │ RAG │ │ Forecast │
32
+ │ MCP Server │ │ MCP │ │ MCP Server │
33
+ └───────┬───────┘ └────┬─────┘ └─────┬──────┘
34
+ │ │ │
35
+ ▼ ▼ ▼
36
+ ┌──────────────────────────────────────────┐
37
+ │ Gemini 2.0 Flash │
38
+ │ (Reasoning & Synthesis) │
39
+ └──────────────┬───────────────────────────┘
40
+
41
+
42
+ ┌───────────────┐
43
+ │ Final Response│
44
+ └───────────────┘
45
+ ```
46
+
47
+ ---
48
+
49
+ ## MCP Server 1: Orchestrator
50
+
51
+ ### Purpose
52
+ Coordinates all MCP servers and manages the agent workflow.
53
+
54
+ ### Deployment
55
+ - **URL:** https://mcp-1st-birthday-rewardpilot-orchestrator.hf.space
56
+ - **Stack:** FastAPI + Claude 3.5 Sonnet
57
+ - **Hosting:** Hugging Face Spaces
58
+
59
+ ### API Endpoints
60
+
61
+ #### POST `/recommend`
62
+ Get card recommendation for a transaction.
63
+
64
+ **Request:**
65
+ ```json
66
+ {
67
+ "user_id": "u_alice",
68
+ "merchant": "Whole Foods",
69
+ "mcc": "5411",
70
+ "amount_usd": 127.50,
71
+ "category": "Groceries"
72
+ }
73
+ ```
74
+
75
+ **Response:**
76
+ ```json
77
+ {
78
+ "recommended_card": {
79
+ "card_id": "c_amex_gold",
80
+ "card_name": "American Express Gold",
81
+ "issuer": "American Express"
82
+ },
83
+ "rewards": {
84
+ "points_earned": 510,
85
+ "cash_value": 5.10,
86
+ "earn_rate": "4x points"
87
+ },
88
+ "reasoning": "Amex Gold offers 4x points on U.S. supermarkets...",
89
+ "confidence": 0.95,
90
+ "alternatives": [
91
+ {
92
+ "card_name": "Citi Custom Cash",
93
+ "rewards": 3.82,
94
+ "reason": "5% but monthly cap already hit"
95
+ }
96
+ ],
97
+ "warnings": [
98
+ "You're at $450/$1500 monthly cap. 3 more grocery trips available."
99
+ ]
100
+ }
101
+ ```
102
+
103
+ ### Implementation
104
+
105
+ ```python
106
+ # orchestrator_server.py
107
+ from fastapi import FastAPI, HTTPException
108
+ from anthropic import Anthropic
109
+ import httpx
110
+ import asyncio
111
+
112
+ app = FastAPI(title="RewardPilot Orchestrator")
113
+ anthropic = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
114
+
115
+ @app.post("/recommend")
116
+ async def recommend_card(request: TransactionRequest):
117
+ # Phase 1: Planning with Claude
118
+ plan = await create_execution_plan(request)
119
+
120
+ # Phase 2: Parallel MCP calls
121
+ mcp_results = await execute_mcp_calls(plan)
122
+
123
+ # Phase 3: Reasoning with Gemini
124
+ explanation = await synthesize_reasoning(request, mcp_results)
125
+
126
+ # Phase 4: Format response
127
+ return format_recommendation(mcp_results, explanation)
128
+
129
+ async def create_execution_plan(request: TransactionRequest):
130
+ """Claude analyzes transaction and plans MCP calls"""
131
+ prompt = f"""
132
+ Analyze this transaction and determine which MCP servers to call:
133
+
134
+ Transaction:
135
+ - Merchant: {request.merchant}
136
+ - Category: {request.category}
137
+ - Amount: ${request.amount_usd}
138
+
139
+ Available MCP servers:
140
+ 1. smart_wallet - Card recommendations and reward calculations
141
+ 2. rewards_rag - Semantic search of card benefits
142
+ 3. spend_forecast - Spending predictions and cap warnings
143
+
144
+ Return a JSON plan with:
145
+ - strategy: optimization approach
146
+ - mcp_calls: list of servers to call (priority order)
147
+ - confidence_threshold: minimum confidence for recommendation
148
+ """
149
+
150
+ response = anthropic.messages.create(
151
+ model="claude-3-5-sonnet-20241022",
152
+ max_tokens=1024,
153
+ messages=[{"role": "user", "content": prompt}]
154
+ )
155
+
156
+ return json.loads(response.content[0].text)
157
+
158
+ async def execute_mcp_calls(plan: dict):
159
+ """Call MCP servers in parallel"""
160
+ tasks = []
161
+
162
+ for mcp_call in plan["mcp_calls"]:
163
+ if mcp_call["service"] == "smart_wallet":
164
+ tasks.append(call_smart_wallet(request))
165
+ elif mcp_call["service"] == "rewards_rag":
166
+ tasks.append(call_rewards_rag(request))
167
+ elif mcp_call["service"] == "spend_forecast":
168
+ tasks.append(call_forecast(request))
169
+
170
+ results = await asyncio.gather(*tasks)
171
+ return dict(zip([c["service"] for c in plan["mcp_calls"]], results))
172
+ ```
173
+
174
+ ---
175
+
176
+ ## MCP Server 2: Smart Wallet
177
+
178
+ ### Purpose
179
+ Analyzes user's credit cards and calculates optimal rewards.
180
+
181
+ ### Deployment
182
+ - **URL:** https://mcp-1st-birthday-rewardpilot-smart-wallet.hf.space
183
+ - **Stack:** FastAPI + Python + PostgreSQL
184
+ - **Hosting:** Hugging Face Spaces
185
+
186
+ ### API Endpoints
187
+
188
+ #### POST `/analyze`
189
+ Analyze transaction against user's wallet.
190
+
191
+ **Request:**
192
+ ```json
193
+ {
194
+ "user_id": "u_alice",
195
+ "merchant": "Whole Foods",
196
+ "mcc": "5411",
197
+ "amount_usd": 127.50
198
+ }
199
+ ```
200
+
201
+ **Response:**
202
+ ```json
203
+ {
204
+ "recommended_card": {
205
+ "card_id": "c_amex_gold",
206
+ "card_name": "American Express Gold",
207
+ "rewards_earned": 5.10,
208
+ "earn_rate": "4x points",
209
+ "points_earned": 510
210
+ },
211
+ "all_cards_comparison": [
212
+ {
213
+ "card_name": "Amex Gold",
214
+ "rewards": 5.10,
215
+ "rank": 1
216
+ },
217
+ {
218
+ "card_name": "Citi Custom Cash",
219
+ "rewards": 3.82,
220
+ "rank": 2,
221
+ "note": "Cap already hit this month"
222
+ }
223
+ ]
224
+ }
225
+ ```
226
+
227
+ ### Implementation
228
+
229
+ ```python
230
+ # smart_wallet_server.py
231
+ from fastapi import FastAPI
232
+ from sqlalchemy import create_engine
233
+ from typing import List
234
+
235
+ app = FastAPI(title="Smart Wallet MCP")
236
+
237
+ class CardAnalyzer:
238
+ def __init__(self, user_id: str):
239
+ self.user_id = user_id
240
+ self.cards = self.load_user_cards()
241
+
242
+ def analyze_transaction(self, merchant: str, mcc: str, amount: float):
243
+ """Calculate rewards for all cards"""
244
+ results = []
245
+
246
+ for card in self.cards:
247
+ # Get reward rate for this MCC
248
+ reward_rate = self.get_reward_rate(card, mcc)
249
+
250
+ # Check spending caps
251
+ current_spending = self.get_monthly_spending(card, mcc)
252
+ cap_remaining = card.monthly_cap - current_spending
253
+
254
+ # Calculate rewards
255
+ if cap_remaining >= amount:
256
+ rewards = amount * reward_rate
257
+ else:
258
+ # Partial cap scenario
259
+ rewards = (cap_remaining * reward_rate) +
260
+ ((amount - cap_remaining) * card.base_rate)
261
+
262
+ results.append({
263
+ "card": card,
264
+ "rewards": rewards,
265
+ "effective_rate": rewards / amount,
266
+ "cap_status": {
267
+ "current": current_spending,
268
+ "limit": card.monthly_cap,
269
+ "remaining": cap_remaining
270
+ }
271
+ })
272
+
273
+ # Sort by rewards (descending)
274
+ results.sort(key=lambda x: x["rewards"], reverse=True)
275
+
276
+ return results[0] # Return best card
277
+ ```
278
+
279
+ ---
280
+
281
+ ## MCP Server 3: Rewards RAG
282
+
283
+ ### Purpose
284
+ Semantic search across credit card benefit documents.
285
+
286
+ ### Deployment
287
+ - **URL:** https://mcp-1st-birthday-rewardpilot-rewards-rag.hf.space
288
+ - **Stack:** FastAPI + LlamaIndex + ChromaDB
289
+ - **Hosting:** Hugging Face Spaces
290
+
291
+ ### API Endpoints
292
+
293
+ #### POST `/query`
294
+ Search card benefits with natural language.
295
+
296
+ **Request:**
297
+ ```json
298
+ {
299
+ "query": "Does Amex Gold work at Costco for groceries?",
300
+ "card_name": "American Express Gold",
301
+ "top_k": 3
302
+ }
303
+ ```
304
+
305
+ **Response:**
306
+ ```json
307
+ {
308
+ "answer": "No, American Express cards are not accepted at Costco warehouse locations due to Costco's exclusive Visa agreement. However, Amex Gold works at Costco.com for online orders.",
309
+ "sources": [
310
+ {
311
+ "card_name": "American Express Gold",
312
+ "content": "Merchant acceptance: Not accepted at Costco warehouses...",
313
+ "relevance_score": 0.92
314
+ }
315
+ ]
316
+ }
317
+ ```
318
+
319
+ ### Implementation
320
+ See `docs/llamaindex_setup.md` for detailed RAG implementation.
321
+
322
+ ---
323
+
324
+ ## MCP Server 4: Spend Forecast
325
+
326
+ ### Purpose
327
+ ML-based spending predictions and cap warnings.
328
+
329
+ ### Deployment
330
+ - **URL:** https://mcp-1st-birthday-rewardpilot-spend-forecast.hf.space
331
+ - **Stack:** FastAPI + Scikit-learn + Redis
332
+ - **Hosting:** Hugging Face Spaces
333
+
334
+ ### API Endpoints
335
+
336
+ #### POST `/predict`
337
+ Predict spending for next period.
338
+
339
+ **Request:**
340
+ ```json
341
+ {
342
+ "user_id": "u_alice",
343
+ "card_id": "c_amex_gold",
344
+ "category": "Groceries",
345
+ "horizon_days": 30
346
+ }
347
+ ```
348
+
349
+ **Response:**
350
+ ```json
351
+ {
352
+ "predicted_spending": 520.50,
353
+ "confidence_interval": [480.00, 560.00],
354
+ "warnings": [
355
+ {
356
+ "type": "cap_warning",
357
+ "message": "Likely to exceed $500 monthly cap",
358
+ "probability": 0.78,
359
+ "suggested_action": "Switch to Citi Custom Cash after $500"
360
+ }
361
+ ]
362
+ }
363
+ ```
364
+
365
+ ### Implementation
366
+
367
+ ```python
368
+ # forecast_server.py
369
+ from fastapi import FastAPI
370
+ from sklearn.ensemble import RandomForestRegressor
371
+ import numpy as np
372
+
373
+ app = FastAPI(title="Spend Forecast MCP")
374
+
375
+ class SpendingForecaster:
376
+ def __init__(self):
377
+ self.model = RandomForestRegressor(n_estimators=100)
378
+
379
+ def predict(self, user_id: str, category: str, horizon_days: int):
380
+ """Predict spending for next N days"""
381
+ # Load historical data
382
+ history = self.load_user_history(user_id, category)
383
+
384
+ # Feature engineering
385
+ features = self.extract_features(history)
386
+
387
+ # Predict
388
+ prediction = self.model.predict(features)
389
+
390
+ # Calculate confidence interval
391
+ predictions = [tree.predict(features) for tree in self.model.estimators_]
392
+ lower = np.percentile(predictions, 5)
393
+ upper = np.percentile(predictions, 95)
394
+
395
+ return {
396
+ "predicted_spending": float(prediction[0]),
397
+ "confidence_interval": [float(lower), float(upper)]
398
+ }
399
+ ```
400
+
401
+ ---
402
+
403
+ ## Communication Flow
404
+
405
+ ### Sequence Diagram
406
+
407
+ ```
408
+ User -> Gradio: Enter transaction
409
+ Gradio -> Orchestrator: POST /recommend
410
+ Orchestrator -> Claude: Create execution plan
411
+ Claude -> Orchestrator: {plan: call all 3 MCPs}
412
+
413
+ Orchestrator -> Smart Wallet: POST /analyze
414
+ Orchestrator -> RAG: POST /query
415
+ Orchestrator -> Forecast: POST /predict
416
+
417
+ Smart Wallet -> Orchestrator: {best_card: Amex Gold, rewards: 5.10}
418
+ RAG -> Orchestrator: {benefits: "4x on groceries..."}
419
+ Forecast -> Orchestrator: {warning: "Near cap"}
420
+
421
+ Orchestrator -> Gemini: Synthesize results
422
+ Gemini -> Orchestrator: {explanation: "Use Amex Gold because..."}
423
+
424
+ Orchestrator -> Gradio: Final recommendation
425
+ Gradio -> User: Display result
426
+ ```
427
+
428
+ ---
429
+
430
+ ## Deployment Instructions
431
+
432
+ ### 1. Deploy Each MCP Server to Hugging Face
433
+
434
+ ```bash
435
+ # Clone template
436
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/rewardpilot-orchestrator
437
+
438
+ # Add files
439
+ cp orchestrator_server.py app.py
440
+ cp requirements.txt .
441
+
442
+ # Create Space on HF
443
+ huggingface-cli repo create rewardpilot-orchestrator --type space --space_sdk gradio
444
+
445
+ # Push
446
+ git add .
447
+ git commit -m "Deploy orchestrator"
448
+ git push
449
+ ```
450
+
451
+ ### 2. Set Environment Variables
452
+
453
+ In each Space's settings, add:
454
+ ```bash
455
+ ANTHROPIC_API_KEY=sk-ant-xxxxx
456
+ GEMINI_API_KEY=AIzaSyxxxxx
457
+ OPENAI_API_KEY=sk-xxxxx
458
+ ```
459
+
460
+ ### 3. Configure Endpoints
461
+
462
+ In main `app.py`:
463
+ ```python
464
+ MCP_ENDPOINTS = {
465
+ "orchestrator": "https://mcp-1st-birthday-rewardpilot-orchestrator.hf.space",
466
+ "smart_wallet": "https://mcp-1st-birthday-rewardpilot-smart-wallet.hf.space",
467
+ "rewards_rag": "https://mcp-1st-birthday-rewardpilot-rewards-rag.hf.space",
468
+ "forecast": "https://mcp-1st-birthday-rewardpilot-spend-forecast.hf.space"
469
+ }
470
+ ```
471
+
472
+ ---
473
+
474
+ ## Error Handling
475
+
476
+ ### Graceful Degradation
477
+
478
+ ```python
479
+ async def call_mcp_with_fallback(service_name: str, request_data: dict):
480
+ """Call MCP server with timeout and fallback"""
481
+ try:
482
+ async with httpx.AsyncClient(timeout=10.0) as client:
483
+ response = await client.post(
484
+ MCP_ENDPOINTS[service_name],
485
+ json=request_data
486
+ )
487
+ response.raise_for_status()
488
+ return response.json()
489
+ except httpx.TimeoutException:
490
+ logger.error(f"{service_name} timeout")
491
+ return get_fallback_response(service_name)
492
+ except httpx.HTTPError as e:
493
+ logger.error(f"{service_name} error: {e}")
494
+ return get_fallback_response(service_name)
495
+ ```
496
+
497
+ ---
498
+
499
+ ## Monitoring
500
+
501
+ ### Health Checks
502
+
503
+ ```python
504
+ @app.get("/health")
505
+ async def health_check():
506
+ """Check status of all MCP servers"""
507
+ statuses = {}
508
+
509
+ for service, url in MCP_ENDPOINTS.items():
510
+ try:
511
+ async with httpx.AsyncClient(timeout=5.0) as client:
512
+ response = await client.get(f"{url}/health")
513
+ statuses[service] = {
514
+ "status": "healthy" if response.status_code == 200 else "unhealthy",
515
+ "latency_ms": response.elapsed.total_seconds() * 1000
516
+ }
517
+ except Exception as e:
518
+ statuses[service] = {"status": "down", "error": str(e)}
519
+
520
+ return statuses
521
+ ```
522
+
523
+ ---
524
+
525
+ ## Performance Optimization
526
+
527
+ ### Caching Strategy
528
+
529
+ ```python
530
+ from functools import lru_cache
531
+ import redis
532
+
533
+ # Redis cache for frequent queries
534
+ redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)
535
+
536
+ @lru_cache(maxsize=1000)
537
+ def get_card_benefits(card_name: str):
538
+ """Cache card benefits for 1 hour"""
539
+ cache_key = f"benefits:{card_name}"
540
+
541
+ # Check cache
542
+ cached = redis_client.get(cache_key)
543
+ if cached:
544
+ return json.loads(cached)
545
+
546
+ # Fetch from RAG
547
+ result = call_rewards_rag({"query": f"Get all benefits for {card_name}"})
548
+
549
+ # Cache for 1 hour
550
+ redis_client.setex(cache_key, 3600, json.dumps(result))
551
+
552
+ return result
553
+ ```
554
+
555
+ ---
556
+
557
+ ## Testing
558
+
559
+ ### Integration Tests
560
+
561
+ ```python
562
+ import pytest
563
+ import httpx
564
+
565
+ @pytest.mark.asyncio
566
+ async def test_orchestrator_end_to_end():
567
+ """Test full recommendation flow"""
568
+ async with httpx.AsyncClient() as client:
569
+ response = await client.post(
570
+ f"{MCP_ENDPOINTS['orchestrator']}/recommend",
571
+ json={
572
+ "user_id": "test_user",
573
+ "merchant": "Whole Foods",
574
+ "amount_usd": 100.00
575
+ }
576
+ )
577
+
578
+ assert response.status_code == 200
579
+ data = response.json()
580
+ assert "recommended_card" in data
581
+ assert "rewards" in data
582
+ assert "reasoning" in data
583
+ ```
584
+
585
+ ---
586
+
587
+ ## Next Steps
588
+
589
+ 1. **Scale MCP servers** - Add load balancing
590
+ 2. **Add authentication** - JWT tokens for API access
591
+ 3. **Implement webhooks** - Real-time transaction notifications
592
+ 4. **Add more MCP servers** - Travel optimization, business expenses, etc.
593
+
594
+ ---
595
+
596
+ **Related Documentation:**
597
+ - [Modal Deployment Guide](./modal_deployment.md)
598
+ - [LlamaIndex RAG Setup](./llamaindex_setup.md)
599
+ - [Agent Reasoning Flow](./agent_reasoning.md)
600
+ ```
601
+
602
+ ---