sammy786 commited on
Commit
05f8865
Β·
verified Β·
1 Parent(s): f409465

Create utils/llamaindex_rag.py

Browse files
Files changed (1) hide show
  1. utils/llamaindex_rag.py +309 -0
utils/llamaindex_rag.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LlamaIndex RAG for Credit Card Benefits Knowledge Base
3
+ Provides intelligent context for card recommendations
4
+ """
5
+
6
+ import os
7
+ import logging
8
+ from typing import Optional, Dict, List
9
+ from pathlib import Path
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Check if LlamaIndex is available
14
+ LLAMAINDEX_AVAILABLE = False
15
+ try:
16
+ from llama_index.core import (
17
+ VectorStoreIndex,
18
+ SimpleDirectoryReader,
19
+ Settings,
20
+ StorageContext,
21
+ load_index_from_storage
22
+ )
23
+ from llama_index.embeddings.openai import OpenAIEmbedding
24
+ from llama_index.llms.openai import OpenAI
25
+ LLAMAINDEX_AVAILABLE = True
26
+ logger.info("βœ… LlamaIndex library imported successfully")
27
+ except ImportError as e:
28
+ logger.warning(f"⚠️ LlamaIndex not installed: {e}")
29
+ logger.warning("Install with: pip install llama-index llama-index-embeddings-openai llama-index-llms-openai")
30
+ except Exception as e:
31
+ logger.error(f"❌ Error importing LlamaIndex: {e}")
32
+
33
+
34
+ class CardBenefitsRAG:
35
+ """RAG system for credit card benefits using LlamaIndex"""
36
+
37
+ def __init__(self, data_dir: str = "data/card_benefits", persist_dir: str = ".index_storage"):
38
+ """
39
+ Initialize LlamaIndex RAG
40
+
41
+ Args:
42
+ data_dir: Directory containing card benefit markdown files
43
+ persist_dir: Directory to persist the index
44
+ """
45
+ self.enabled = LLAMAINDEX_AVAILABLE and bool(os.getenv("OPENAI_API_KEY"))
46
+
47
+ if not LLAMAINDEX_AVAILABLE:
48
+ logger.warning("⚠️ LlamaIndex library not available")
49
+ logger.warning("To enable RAG features:")
50
+ logger.warning("1. Add 'llama-index' to requirements.txt")
51
+ logger.warning("2. Run: pip install llama-index llama-index-embeddings-openai llama-index-llms-openai")
52
+ return
53
+
54
+ if not os.getenv("OPENAI_API_KEY"):
55
+ logger.warning("⚠️ OPENAI_API_KEY not found in environment")
56
+ logger.warning("RAG features will be disabled")
57
+ return
58
+
59
+ self.data_dir = Path(data_dir)
60
+ self.persist_dir = Path(persist_dir)
61
+
62
+ try:
63
+ # Configure LlamaIndex settings
64
+ Settings.llm = OpenAI(
65
+ model="gpt-4-turbo-preview",
66
+ temperature=0.1,
67
+ api_key=os.getenv("OPENAI_API_KEY")
68
+ )
69
+ Settings.embed_model = OpenAIEmbedding(
70
+ model="text-embedding-3-small",
71
+ api_key=os.getenv("OPENAI_API_KEY")
72
+ )
73
+ Settings.chunk_size = 512
74
+ Settings.chunk_overlap = 50
75
+
76
+ # Load or create index
77
+ if self.persist_dir.exists():
78
+ logger.info("πŸ“š Loading existing LlamaIndex from storage...")
79
+ try:
80
+ storage_context = StorageContext.from_defaults(persist_dir=str(self.persist_dir))
81
+ self.index = load_index_from_storage(storage_context)
82
+ logger.info("βœ… Index loaded from storage")
83
+ except Exception as e:
84
+ logger.warning(f"⚠️ Could not load index from storage: {e}")
85
+ logger.info("Creating new index...")
86
+ self._create_index()
87
+ else:
88
+ logger.info("πŸ“š Creating new LlamaIndex from documents...")
89
+ self._create_index()
90
+
91
+ # Create query engine
92
+ self.query_engine = self.index.as_query_engine(
93
+ similarity_top_k=3,
94
+ response_mode="compact"
95
+ )
96
+
97
+ logger.info("βœ… CardBenefitsRAG initialized successfully")
98
+
99
+ except Exception as e:
100
+ logger.error(f"❌ Failed to initialize CardBenefitsRAG: {e}")
101
+ import traceback
102
+ traceback.print_exc()
103
+ self.enabled = False
104
+
105
+ def _create_index(self):
106
+ """Create index from documents"""
107
+ if not self.data_dir.exists():
108
+ logger.warning(f"⚠️ Data directory not found: {self.data_dir}")
109
+ logger.info("Creating data directory with sample file...")
110
+ self.data_dir.mkdir(parents=True, exist_ok=True)
111
+ self._create_sample_data()
112
+
113
+ # Check if directory has any files
114
+ files = list(self.data_dir.glob("*.md")) + list(self.data_dir.glob("*.txt"))
115
+ if not files:
116
+ logger.warning("⚠️ No markdown or text files found in data directory")
117
+ logger.info("Creating sample file...")
118
+ self._create_sample_data()
119
+
120
+ # Load documents
121
+ try:
122
+ documents = SimpleDirectoryReader(
123
+ str(self.data_dir),
124
+ required_exts=[".md", ".txt"],
125
+ recursive=False
126
+ ).load_data()
127
+
128
+ logger.info(f"πŸ“„ Loaded {len(documents)} documents")
129
+
130
+ if not documents:
131
+ logger.error("❌ No documents loaded. Check data directory.")
132
+ self.enabled = False
133
+ return
134
+
135
+ # Create index
136
+ self.index = VectorStoreIndex.from_documents(
137
+ documents,
138
+ show_progress=True
139
+ )
140
+
141
+ # Persist index
142
+ self.persist_dir.mkdir(parents=True, exist_ok=True)
143
+ self.index.storage_context.persist(persist_dir=str(self.persist_dir))
144
+ logger.info(f"πŸ’Ύ Index persisted to {self.persist_dir}")
145
+
146
+ except Exception as e:
147
+ logger.error(f"❌ Failed to create index: {e}")
148
+ import traceback
149
+ traceback.print_exc()
150
+ self.enabled = False
151
+
152
+ def _create_sample_data(self):
153
+ """Create sample card benefit file if none exist"""
154
+ sample_file = self.data_dir / "sample_card.md"
155
+ sample_content = """# Sample Credit Card
156
+
157
+ ## Earning Rates
158
+ - 4x points at restaurants
159
+ - 4x points at U.S. supermarkets (up to $25,000/year)
160
+ - 3x points on flights
161
+ - 1x points on everything else
162
+
163
+ ## Annual Fee
164
+ $250 (offset by $240 in credits)
165
+
166
+ ## Best For
167
+ Dining and grocery spending
168
+
169
+ ## Important Notes
170
+ - Supercenters like Walmart and Target do NOT count as supermarkets
171
+ - Must activate credits to receive full value
172
+ - No foreign transaction fees
173
+ """
174
+ sample_file.write_text(sample_content)
175
+ logger.info(f"πŸ“ Created sample file: {sample_file}")
176
+
177
+ def query_benefits(self, card_name: str, question: str) -> Optional[str]:
178
+ """
179
+ Query card benefits
180
+
181
+ Args:
182
+ card_name: Name of the card
183
+ question: Question about the card
184
+
185
+ Returns:
186
+ Answer from RAG or None
187
+ """
188
+ if not self.enabled:
189
+ logger.warning("RAG query skipped (not enabled)")
190
+ return None
191
+
192
+ try:
193
+ query = f"For {card_name}: {question}"
194
+ logger.info(f"πŸ” RAG Query: {query}")
195
+ response = self.query_engine.query(query)
196
+ return str(response)
197
+ except Exception as e:
198
+ logger.error(f"❌ Query failed: {e}")
199
+ import traceback
200
+ traceback.print_exc()
201
+ return None
202
+
203
+ def get_card_context(self, card_name: str, merchant: str, category: str) -> Optional[str]:
204
+ """
205
+ Get relevant context for a card recommendation
206
+
207
+ Args:
208
+ card_name: Recommended card
209
+ merchant: Merchant name
210
+ category: Spending category
211
+
212
+ Returns:
213
+ Relevant context or None
214
+ """
215
+ if not self.enabled:
216
+ return None
217
+
218
+ try:
219
+ query = f"""For {card_name} when shopping at {merchant} ({category} category):
220
+ 1. What are the earning rates for {category} purchases?
221
+ 2. Are there any spending caps or exclusions relevant to {merchant}?
222
+ 3. What are 2-3 key benefits or warnings for this type of purchase?
223
+
224
+ Provide a concise summary in 2-3 sentences."""
225
+
226
+ logger.info(f"πŸ” Context Query: {card_name} at {merchant}")
227
+ response = self.query_engine.query(query)
228
+ result = str(response)
229
+
230
+ # Clean up response
231
+ if len(result) > 500:
232
+ result = result[:500] + "..."
233
+
234
+ return result
235
+
236
+ except Exception as e:
237
+ logger.error(f"❌ Context retrieval failed: {e}")
238
+ return None
239
+
240
+ def compare_cards(self, card1: str, card2: str, category: str) -> Optional[str]:
241
+ """
242
+ Compare two cards for a specific category
243
+
244
+ Args:
245
+ card1: First card name
246
+ card2: Second card name
247
+ category: Spending category
248
+
249
+ Returns:
250
+ Comparison or None
251
+ """
252
+ if not self.enabled:
253
+ return None
254
+
255
+ try:
256
+ query = f"Compare {card1} vs {card2} for {category} spending. Which is better and why? Provide a concise answer in 2-3 sentences."
257
+ logger.info(f"πŸ” Comparison: {card1} vs {card2} for {category}")
258
+ response = self.query_engine.query(query)
259
+ return str(response)
260
+ except Exception as e:
261
+ logger.error(f"❌ Comparison failed: {e}")
262
+ return None
263
+
264
+ def get_spending_warnings(self, card_name: str, category: str, amount: float) -> Optional[str]:
265
+ """
266
+ Get warnings about spending caps or limitations
267
+
268
+ Args:
269
+ card_name: Card name
270
+ category: Spending category
271
+ amount: Transaction amount
272
+
273
+ Returns:
274
+ Warnings or None
275
+ """
276
+ if not self.enabled:
277
+ return None
278
+
279
+ try:
280
+ query = f"For {card_name} and a ${amount:.2f} purchase in {category} category: Are there any spending caps, annual limits, or exclusions I should know about? Be specific and concise."
281
+ logger.info(f"πŸ” Warnings: {card_name} ${amount} in {category}")
282
+ response = self.query_engine.query(query)
283
+ return str(response)
284
+ except Exception as e:
285
+ logger.error(f"❌ Warning retrieval failed: {e}")
286
+ return None
287
+
288
+
289
+ # Global instance
290
+ _rag_instance = None
291
+
292
+ def get_card_benefits_rag() -> CardBenefitsRAG:
293
+ """Get or create the global RAG instance"""
294
+ global _rag_instance
295
+ if _rag_instance is None:
296
+ _rag_instance = CardBenefitsRAG()
297
+ return _rag_instance
298
+
299
+
300
+ # Initialize on module import (lazy loading)
301
+ def initialize_rag():
302
+ """Initialize RAG system (call this at app startup)"""
303
+ logger.info("πŸš€ Initializing LlamaIndex RAG...")
304
+ rag = get_card_benefits_rag()
305
+ if rag.enabled:
306
+ logger.info("βœ… RAG initialized and ready")
307
+ else:
308
+ logger.warning("⚠️ RAG not available")
309
+ return rag