Christian Kniep commited on
Commit
5d3ee93
·
1 Parent(s): 2e18bf2

update to v2

Browse files
entrypoint.sh CHANGED
@@ -1,8 +1,29 @@
1
  #!/bin/bash
2
- # Webapp entrypoint script - runs database migrations before starting Gunicorn
3
 
4
  set -e
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  echo "[ENTRYPOINT] Running database migrations..."
7
 
8
  # Run Python migration script
@@ -42,6 +63,35 @@ if not table_exists:
42
  cursor.execute("PRAGMA table_info(contact_sessions)")
43
  columns = [row[1] for row in cursor.fetchall()]
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  if 'normalized_name' not in columns:
46
  print("[MIGRATION] Applying migration: Add producer-related columns")
47
 
 
1
  #!/bin/bash
2
+ # Webapp entrypoint script - restores from S3 and runs database migrations before starting Gunicorn
3
 
4
  set -e
5
 
6
+ echo "[ENTRYPOINT] Checking for S3 restore..."
7
+
8
+ # Restore database from S3 if enabled (Feature 015)
9
+ python3 <<'RESTORE_PYEOF'
10
+ import os
11
+ import sys
12
+
13
+ # Add src directory to Python path
14
+ sys.path.insert(0, '/app')
15
+
16
+ try:
17
+ from src.services.s3_restore import restore_on_startup
18
+
19
+ result = restore_on_startup()
20
+ print(f"[S3_RESTORE] Result: {result.value}")
21
+ except Exception as e:
22
+ print(f"[S3_RESTORE] Failed (non-fatal): {e}")
23
+ # Continue startup even if restore fails
24
+
25
+ RESTORE_PYEOF
26
+
27
  echo "[ENTRYPOINT] Running database migrations..."
28
 
29
  # Run Python migration script
 
63
  cursor.execute("PRAGMA table_info(contact_sessions)")
64
  columns = [row[1] for row in cursor.fetchall()]
65
 
66
+ # Step 2a: Add contact_id column if missing (v2 API alignment)
67
+ if 'contact_id' not in columns:
68
+ print("[MIGRATION] Applying migration: Add contact_id column")
69
+
70
+ try:
71
+ cursor.execute("ALTER TABLE contact_sessions ADD COLUMN contact_id VARCHAR(255) NOT NULL DEFAULT ''")
72
+ print(" ✓ Added contact_id column")
73
+ except Exception as e:
74
+ print(f" ⚠ contact_id: {e}")
75
+
76
+ try:
77
+ cursor.execute("UPDATE contact_sessions SET contact_id = session_id WHERE contact_id = ''")
78
+ print(f" ✓ Backfilled {cursor.rowcount} existing contact_id values")
79
+ except Exception as e:
80
+ print(f" ⚠ backfill contact_id: {e}")
81
+
82
+ try:
83
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_contact_sessions_contact_id ON contact_sessions(contact_id)")
84
+ print(" ✓ Created index idx_contact_sessions_contact_id")
85
+ except Exception as e:
86
+ print(f" ⚠ index: {e}")
87
+
88
+ conn.commit()
89
+ print("[MIGRATION] contact_id migration complete")
90
+
91
+ # Refresh columns list after contact_id migration
92
+ cursor.execute("PRAGMA table_info(contact_sessions)")
93
+ columns = [row[1] for row in cursor.fetchall()]
94
+
95
  if 'normalized_name' not in columns:
96
  print("[MIGRATION] Applying migration: Add producer-related columns")
97
 
migrations/003_add_contact_id.sql ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Migration: Add contact_id to contact_sessions table
2
+ -- This aligns with v2 API where contact_id maps to project_id in backend
3
+
4
+ -- Add contact_id column (default to session_id for existing records)
5
+ ALTER TABLE contact_sessions ADD COLUMN contact_id VARCHAR(255) NOT NULL DEFAULT '';
6
+
7
+ -- Update existing records: set contact_id from session_id
8
+ -- Format is typically user_id_uuid, we'll use the full session_id as contact_id
9
+ UPDATE contact_sessions SET contact_id = session_id WHERE contact_id = '';
10
+
11
+ -- Create index for contact_id lookups
12
+ CREATE INDEX IF NOT EXISTS idx_contact_sessions_contact_id ON contact_sessions(contact_id);
requirements.txt CHANGED
@@ -18,6 +18,9 @@ opentelemetry-instrumentation-requests==0.42b0
18
  # Database
19
  SQLAlchemy==2.0.23
20
 
 
 
 
21
  # Configuration
22
  python-dotenv==1.0.0
23
 
 
18
  # Database
19
  SQLAlchemy==2.0.23
20
 
21
+ # S3 backup/restore
22
+ boto3>=1.34.0
23
+
24
  # Configuration
25
  python-dotenv==1.0.0
26
 
src/__pycache__/app.cpython-311.pyc CHANGED
Binary files a/src/__pycache__/app.cpython-311.pyc and b/src/__pycache__/app.cpython-311.pyc differ
 
src/models/__init__.py CHANGED
@@ -26,6 +26,7 @@ class ContactSession:
26
 
27
  session_id: str # Format: {user_id}_{UUID_v4}
28
  user_id: str
 
29
  contact_name: str
30
  contact_description: Optional[str]
31
  is_reference: bool
 
26
 
27
  session_id: str # Format: {user_id}_{UUID_v4}
28
  user_id: str
29
+ contact_id: str # Contact identifier (maps to project_id in v2 backend)
30
  contact_name: str
31
  contact_description: Optional[str]
32
  is_reference: bool
src/models/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/src/models/__pycache__/__init__.cpython-311.pyc and b/src/models/__pycache__/__init__.cpython-311.pyc differ
 
src/routes/__pycache__/auth.cpython-311.pyc CHANGED
Binary files a/src/routes/__pycache__/auth.cpython-311.pyc and b/src/routes/__pycache__/auth.cpython-311.pyc differ
 
src/routes/__pycache__/contacts.cpython-311.pyc CHANGED
Binary files a/src/routes/__pycache__/contacts.cpython-311.pyc and b/src/routes/__pycache__/contacts.cpython-311.pyc differ
 
src/routes/__pycache__/profile.cpython-311.pyc CHANGED
Binary files a/src/routes/__pycache__/profile.cpython-311.pyc and b/src/routes/__pycache__/profile.cpython-311.pyc differ
 
src/routes/__pycache__/settings.cpython-311.pyc CHANGED
Binary files a/src/routes/__pycache__/settings.cpython-311.pyc and b/src/routes/__pycache__/settings.cpython-311.pyc differ
 
src/routes/auth.py CHANGED
@@ -108,8 +108,10 @@ def callback():
108
  # New user - create backend session first, then store with returned session_id
109
  try:
110
  backend_response = backend_client.create_session(
111
- title=f"{display_name}'s Profile",
112
- user_id=user_id
 
 
113
  )
114
  backend_session_id = backend_response.get("session_id")
115
 
 
108
  # New user - create backend session first, then store with returned session_id
109
  try:
110
  backend_response = backend_client.create_session(
111
+ user_id=user_id,
112
+ contact_id="user-profile",
113
+ description=f"{display_name}'s Profile",
114
+ is_reference=False
115
  )
116
  backend_session_id = backend_response.get("session_id")
117
 
src/routes/contacts.py CHANGED
@@ -137,9 +137,13 @@ def create_contact():
137
 
138
  try:
139
  # Phase 1: Create backend session in PostgreSQL
140
- title = f"Contact: {contact_name}"
141
  logger.info(f"[CONTACT_CREATE] Phase 1: Creating backend session for user {user_id}, contact '{contact_name}'")
142
- backend_response = backend_api.create_session(title=title, user_id=user_id)
 
 
 
 
 
143
  session_id = backend_response.get("session_id")
144
 
145
  if not session_id:
 
137
 
138
  try:
139
  # Phase 1: Create backend session in PostgreSQL
 
140
  logger.info(f"[CONTACT_CREATE] Phase 1: Creating backend session for user {user_id}, contact '{contact_name}'")
141
+ backend_response = backend_api.create_session(
142
+ user_id=user_id,
143
+ contact_id=contact_name, # Use contact name as contact_id initially
144
+ description=f"Contact: {contact_name}",
145
+ is_reference=False
146
+ )
147
  session_id = backend_response.get("session_id")
148
 
149
  if not session_id:
src/services/__pycache__/backend_client.cpython-311.pyc CHANGED
Binary files a/src/services/__pycache__/backend_client.cpython-311.pyc and b/src/services/__pycache__/backend_client.cpython-311.pyc differ
 
src/services/__pycache__/s3_backup.cpython-311.pyc ADDED
Binary file (22.4 kB). View file
 
src/services/__pycache__/s3_config.cpython-311.pyc ADDED
Binary file (9.99 kB). View file
 
src/services/__pycache__/s3_restore.cpython-311.pyc ADDED
Binary file (20.2 kB). View file
 
src/services/__pycache__/storage_service.cpython-311.pyc CHANGED
Binary files a/src/services/__pycache__/storage_service.cpython-311.pyc and b/src/services/__pycache__/storage_service.cpython-311.pyc differ
 
src/services/backend_client.py CHANGED
@@ -212,23 +212,35 @@ class BackendAPIClient:
212
  finally:
213
  self._track_latency(start_time)
214
 
215
- def create_session(self, title: str, user_id: str) -> Dict[str, Any]:
 
 
 
 
 
 
216
  """
217
- Create a new session (profile or contact).
218
 
219
  Args:
220
- title: Title for the session
221
- user_id: User ID who owns the session
 
 
222
 
223
  Returns:
224
- Created session dict with keys: session_id, title
225
 
226
  Raises:
227
  BackendAPIError: If API request fails
228
  """
229
  url = f"{self.base_url}/sessions"
230
 
231
- payload = {"title": title}
 
 
 
 
232
 
233
  # Create span for backend call with OpenTelemetry
234
  tracer = trace.get_tracer(__name__)
@@ -242,7 +254,9 @@ class BackendAPIClient:
242
  span.set_attribute("http.method", "POST")
243
  span.set_attribute("http.url", url)
244
  span.set_attribute("user_id", user_id)
245
- span.set_attribute("title", title)
 
 
246
 
247
  try:
248
  response = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
@@ -326,7 +340,16 @@ class BackendAPIClient:
326
  user_id: User ID to list sessions for
327
 
328
  Returns:
329
- List of session dicts
 
 
 
 
 
 
 
 
 
330
 
331
  Raises:
332
  BackendAPIError: If API request fails
 
212
  finally:
213
  self._track_latency(start_time)
214
 
215
+ def create_session(
216
+ self,
217
+ user_id: str,
218
+ contact_id: str,
219
+ description: str = "",
220
+ is_reference: bool = False
221
+ ) -> Dict[str, Any]:
222
  """
223
+ Create a new session in the backend API (v2 format).
224
 
225
  Args:
226
+ user_id: User ID (required for API authentication)
227
+ contact_id: Contact identifier (maps to project_id in v2)
228
+ description: Session description (optional)
229
+ is_reference: Whether this is a reference session
230
 
231
  Returns:
232
+ Response dict with session_id, contact_id, description, org_id, project_id
233
 
234
  Raises:
235
  BackendAPIError: If API request fails
236
  """
237
  url = f"{self.base_url}/sessions"
238
 
239
+ payload = {
240
+ "contact_id": contact_id,
241
+ "description": description,
242
+ "is_reference": is_reference
243
+ }
244
 
245
  # Create span for backend call with OpenTelemetry
246
  tracer = trace.get_tracer(__name__)
 
254
  span.set_attribute("http.method", "POST")
255
  span.set_attribute("http.url", url)
256
  span.set_attribute("user_id", user_id)
257
+ span.set_attribute("contact_id", contact_id)
258
+ span.set_attribute("description", description)
259
+ span.set_attribute("is_reference", is_reference)
260
 
261
  try:
262
  response = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
 
340
  user_id: User ID to list sessions for
341
 
342
  Returns:
343
+ List of session dicts with fields:
344
+ - id: Session ID
345
+ - contact_id: Contact identifier (project_id in v2)
346
+ - title: Session title (legacy, same as description)
347
+ - description: Session description
348
+ - user_id: User ID
349
+ - created_at: Unix timestamp
350
+ - last_interaction: Unix timestamp
351
+ - message_count: Number of messages
352
+ - is_reference: Reference flag
353
 
354
  Raises:
355
  BackendAPIError: If API request fails
src/services/s3_backup.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ S3 Backup Manager
3
+
4
+ Handles automatic backup of SQLite database to S3-compatible storage.
5
+ Provides debouncing to prevent excessive S3 uploads and background
6
+ threading for non-blocking backup operations.
7
+ """
8
+
9
+ import os
10
+ import sqlite3
11
+ import hashlib
12
+ import threading
13
+ import time
14
+ import logging
15
+ from datetime import datetime
16
+ from typing import Optional
17
+ from dataclasses import dataclass
18
+
19
+ import boto3
20
+ from botocore.exceptions import ClientError
21
+
22
+ from ..utils import s3_logger
23
+ from .s3_config import (
24
+ S3Config,
25
+ S3BackupError,
26
+ S3CredentialsError,
27
+ S3BucketNotFoundError,
28
+ S3ConnectionError,
29
+ DatabaseCorruptedError
30
+ )
31
+
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ @dataclass
37
+ class BackupMetadata:
38
+ """Metadata for a backup file in S3."""
39
+ s3_key: str
40
+ last_modified: datetime
41
+ size_bytes: int
42
+ checksum_sha256: Optional[str] = None
43
+
44
+
45
+ class BackupManager:
46
+ """
47
+ Manages automatic backup of SQLite database to S3.
48
+
49
+ Features:
50
+ - Non-blocking backup requests with debouncing
51
+ - Background thread execution
52
+ - Retry logic with exponential backoff
53
+ - Checksum validation
54
+ - Graceful error handling
55
+ """
56
+
57
+ def __init__(self, config: S3Config, db_path: str):
58
+ """
59
+ Initialize the backup manager.
60
+
61
+ Args:
62
+ config: S3 configuration object
63
+ db_path: Absolute path to SQLite database file
64
+
65
+ Raises:
66
+ ValueError: If db_path does not exist or is not readable
67
+ """
68
+ if not os.path.exists(db_path):
69
+ raise ValueError(f"Database file does not exist: {db_path}")
70
+
71
+ if not os.access(db_path, os.R_OK):
72
+ raise ValueError(f"Database file is not readable: {db_path}")
73
+
74
+ self.config = config
75
+ self.db_path = db_path
76
+ self.last_backup_request = None
77
+ self.backup_lock = threading.Lock()
78
+ self._debounce_thread = None
79
+
80
+ if config.enabled:
81
+ self.s3_client = config.create_s3_client()
82
+ logger.info(f"BackupManager initialized for {db_path}")
83
+ else:
84
+ self.s3_client = None
85
+ logger.info("BackupManager initialized but S3 is disabled")
86
+
87
+ def request_backup(self) -> None:
88
+ """
89
+ Non-blocking method to request a database backup.
90
+
91
+ Uses debouncing to prevent excessive S3 uploads. Multiple requests
92
+ within the debounce period are collapsed into a single backup.
93
+
94
+ Side Effects:
95
+ - Starts background thread if not already running
96
+ - Updates last_backup_request timestamp
97
+ """
98
+ if not self.config.enabled:
99
+ s3_logger.backup_skip_reason('s3_not_enabled', 'backup_requested')
100
+ return
101
+
102
+ with self.backup_lock:
103
+ self.last_backup_request = time.time()
104
+
105
+ # Start debounce thread if not already running
106
+ if self._debounce_thread is None or not self._debounce_thread.is_alive():
107
+ self._debounce_thread = threading.Thread(
108
+ target=self._debounced_backup,
109
+ daemon=True
110
+ )
111
+ self._debounce_thread.start()
112
+ logger.debug("Started debounce thread for backup")
113
+
114
+ def _debounced_backup(self) -> None:
115
+ """
116
+ Wait for debounce period, then execute backup.
117
+
118
+ This runs in a background thread and waits for the debounce period
119
+ to ensure no more backup requests arrive before executing.
120
+ """
121
+ debounce_seconds = self.config.debounce_seconds
122
+
123
+ # Wait for debounce period
124
+ time.sleep(debounce_seconds)
125
+
126
+ # Check if another request came in during debounce
127
+ with self.backup_lock:
128
+ time_since_last_request = time.time() - self.last_backup_request
129
+ if time_since_last_request < debounce_seconds:
130
+ # Another request came in, let it handle the backup
131
+ logger.debug("Backup skipped - newer request pending")
132
+ return
133
+
134
+ # Execute the backup
135
+ self._execute_backup()
136
+
137
+ def execute_backup_now(self) -> bool:
138
+ """
139
+ Synchronous method to execute backup immediately, bypassing debounce.
140
+
141
+ Returns:
142
+ True if backup succeeded, False if it failed
143
+
144
+ Raises:
145
+ S3CredentialsError: Invalid S3 credentials
146
+ S3BucketNotFoundError: Bucket does not exist
147
+ DatabaseCorruptedError: Source database failed integrity check
148
+ """
149
+ if not self.config.enabled:
150
+ logger.warning("Backup requested but S3 is disabled")
151
+ return False
152
+
153
+ return self._execute_backup()
154
+
155
+ def _execute_backup(self) -> bool:
156
+ """
157
+ Execute the actual backup operation.
158
+
159
+ Process:
160
+ 1. Check database integrity
161
+ 2. Create hot backup using sqlite3.backup()
162
+ 3. Calculate checksum
163
+ 4. Upload to S3 with retries
164
+ 5. Clean up temp files
165
+
166
+ Returns:
167
+ True if backup succeeded, False otherwise
168
+ """
169
+ start_time = time.time()
170
+ db_size = os.path.getsize(self.db_path)
171
+
172
+ s3_logger.backup_started(self.db_path, db_size)
173
+ logger.info(f"Starting backup of {self.db_path} ({db_size} bytes)")
174
+
175
+ temp_path = None
176
+
177
+ try:
178
+ # Validate source database integrity
179
+ if not self._validate_database(self.db_path):
180
+ raise DatabaseCorruptedError("Source database failed integrity check")
181
+
182
+ # Create backup using sqlite3.backup() API
183
+ temp_path = f"{self.db_path}.backup"
184
+ self._create_hot_backup(temp_path)
185
+
186
+ # Calculate checksum
187
+ checksum = self._calculate_checksum(temp_path)
188
+
189
+ # Upload to S3 with timestamp
190
+ timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S')
191
+ s3_key = f"contacts-{timestamp}.db"
192
+
193
+ if not self._upload_to_s3(temp_path, s3_key, checksum):
194
+ return False
195
+
196
+ # Success
197
+ duration = time.time() - start_time
198
+ upload_size = os.path.getsize(temp_path)
199
+ s3_logger.backup_completed(duration, s3_key, upload_size)
200
+ logger.info(f"Backup completed successfully: {s3_key} ({duration:.2f}s)")
201
+
202
+ return True
203
+
204
+ except DatabaseCorruptedError as e:
205
+ s3_logger.backup_failed(str(e))
206
+ logger.error(f"Backup failed - database corrupted: {e}")
207
+ raise
208
+
209
+ except Exception as e:
210
+ s3_logger.backup_failed(str(e))
211
+ logger.error(f"Backup failed: {e}", exc_info=True)
212
+ return False
213
+
214
+ finally:
215
+ # Clean up temp file
216
+ if temp_path and os.path.exists(temp_path):
217
+ try:
218
+ os.remove(temp_path)
219
+ logger.debug(f"Cleaned up temp file: {temp_path}")
220
+ except OSError as e:
221
+ logger.warning(f"Failed to clean up temp file: {e}")
222
+
223
+ def _create_hot_backup(self, dest_path: str) -> None:
224
+ """
225
+ Create a hot backup of the database using sqlite3.backup() API.
226
+
227
+ This method is safe to use while the database is being written to,
228
+ as sqlite3.backup() handles concurrent access properly.
229
+
230
+ Args:
231
+ dest_path: Path where backup should be created
232
+ """
233
+ logger.debug(f"Creating hot backup to {dest_path}")
234
+
235
+ # Connect to source database
236
+ source_conn = sqlite3.connect(self.db_path)
237
+
238
+ try:
239
+ # Create destination connection
240
+ dest_conn = sqlite3.connect(dest_path)
241
+
242
+ try:
243
+ # Execute hot backup
244
+ source_conn.backup(dest_conn)
245
+ logger.debug("Hot backup completed")
246
+ finally:
247
+ dest_conn.close()
248
+ finally:
249
+ source_conn.close()
250
+
251
+ def _calculate_checksum(self, file_path: str) -> str:
252
+ """
253
+ Calculate SHA-256 checksum of a file.
254
+
255
+ Args:
256
+ file_path: Path to file
257
+
258
+ Returns:
259
+ Hexadecimal SHA-256 checksum string
260
+ """
261
+ sha256_hash = hashlib.sha256()
262
+
263
+ with open(file_path, 'rb') as f:
264
+ # Read in chunks for memory efficiency
265
+ for chunk in iter(lambda: f.read(8192), b''):
266
+ sha256_hash.update(chunk)
267
+
268
+ checksum = sha256_hash.hexdigest()
269
+ logger.debug(f"Calculated checksum: {checksum}")
270
+ return checksum
271
+
272
+ def _upload_to_s3(
273
+ self,
274
+ file_path: str,
275
+ s3_key: str,
276
+ checksum: str,
277
+ max_retries: int = 3
278
+ ) -> bool:
279
+ """
280
+ Upload file to S3 with retry logic and exponential backoff.
281
+
282
+ Args:
283
+ file_path: Path to file to upload
284
+ s3_key: S3 object key
285
+ checksum: SHA-256 checksum to store in metadata
286
+ max_retries: Maximum number of retry attempts
287
+
288
+ Returns:
289
+ True if upload succeeded, False otherwise
290
+ """
291
+ file_size = os.path.getsize(file_path)
292
+
293
+ for attempt in range(max_retries):
294
+ try:
295
+ logger.debug(f"Uploading to S3: {s3_key} (attempt {attempt + 1}/{max_retries})")
296
+
297
+ # Upload with metadata
298
+ with open(file_path, 'rb') as f:
299
+ self.s3_client.upload_fileobj(
300
+ f,
301
+ self.config.bucket,
302
+ s3_key,
303
+ ExtraArgs={
304
+ 'Metadata': {
305
+ 'sha256': checksum,
306
+ 'source_host': os.uname().nodename,
307
+ 'db_version': str(sqlite3.sqlite_version)
308
+ }
309
+ }
310
+ )
311
+
312
+ logger.info(f"Upload successful: {s3_key} ({file_size} bytes)")
313
+ return True
314
+
315
+ except ClientError as e:
316
+ error_code = e.response['Error']['Code']
317
+
318
+ # Permanent errors - don't retry
319
+ if error_code in ['NoSuchBucket', 'AccessDenied', 'InvalidAccessKeyId']:
320
+ s3_logger.backup_failed(error_code, attempt + 1, max_retries)
321
+ logger.error(f"Permanent S3 error: {error_code}")
322
+
323
+ if error_code == 'NoSuchBucket':
324
+ raise S3BucketNotFoundError(f"Bucket not found: {self.config.bucket}")
325
+ elif error_code in ['AccessDenied', 'InvalidAccessKeyId']:
326
+ raise S3CredentialsError(f"Invalid credentials: {error_code}")
327
+
328
+ return False
329
+
330
+ # Transient errors - retry with backoff
331
+ if attempt < max_retries - 1:
332
+ backoff = 2 ** attempt # 1s, 2s, 4s
333
+ logger.warning(
334
+ f"S3 upload failed (attempt {attempt + 1}/{max_retries}): {error_code}, "
335
+ f"retrying in {backoff}s"
336
+ )
337
+ time.sleep(backoff)
338
+ else:
339
+ s3_logger.backup_failed(error_code, attempt + 1, max_retries)
340
+ logger.error(f"S3 upload failed after {max_retries} attempts: {error_code}")
341
+ return False
342
+
343
+ except Exception as e:
344
+ s3_logger.backup_failed(str(e), attempt + 1, max_retries)
345
+ logger.error(f"Unexpected error during S3 upload: {e}", exc_info=True)
346
+ return False
347
+
348
+ return False
349
+
350
+ def _validate_database(self, db_path: str) -> bool:
351
+ """
352
+ Validate SQLite database integrity.
353
+
354
+ Args:
355
+ db_path: Path to database file
356
+
357
+ Returns:
358
+ True if database passes integrity check, False otherwise
359
+ """
360
+ try:
361
+ conn = sqlite3.connect(db_path)
362
+ cursor = conn.cursor()
363
+ cursor.execute("PRAGMA integrity_check")
364
+ result = cursor.fetchone()[0]
365
+ conn.close()
366
+
367
+ if result == 'ok':
368
+ logger.debug(f"Database integrity check passed: {db_path}")
369
+ return True
370
+ else:
371
+ logger.error(f"Database integrity check failed: {result}")
372
+ return False
373
+
374
+ except Exception as e:
375
+ logger.error(f"Database validation failed: {e}")
376
+ return False
377
+
378
+ def get_latest_backup(self) -> Optional[BackupMetadata]:
379
+ """
380
+ Query S3 for the latest backup file.
381
+
382
+ Returns:
383
+ BackupMetadata object with latest backup info, or None if no backups found
384
+
385
+ Raises:
386
+ S3ConnectionError: Network or S3 service error
387
+ """
388
+ if not self.config.enabled:
389
+ return None
390
+
391
+ try:
392
+ logger.debug(f"Querying S3 for latest backup in bucket: {self.config.bucket}")
393
+
394
+ response = self.s3_client.list_objects_v2(
395
+ Bucket=self.config.bucket,
396
+ Prefix='contacts-'
397
+ )
398
+
399
+ if 'Contents' not in response or len(response['Contents']) == 0:
400
+ logger.info("No backups found in S3")
401
+ return None
402
+
403
+ # Find latest by LastModified
404
+ latest = max(response['Contents'], key=lambda x: x['LastModified'])
405
+
406
+ # Get metadata if available
407
+ try:
408
+ head_response = self.s3_client.head_object(
409
+ Bucket=self.config.bucket,
410
+ Key=latest['Key']
411
+ )
412
+ checksum = head_response.get('Metadata', {}).get('sha256')
413
+ except Exception as e:
414
+ logger.warning(f"Failed to get metadata for {latest['Key']}: {e}")
415
+ checksum = None
416
+
417
+ metadata = BackupMetadata(
418
+ s3_key=latest['Key'],
419
+ last_modified=latest['LastModified'],
420
+ size_bytes=latest['Size'],
421
+ checksum_sha256=checksum
422
+ )
423
+
424
+ logger.info(f"Latest backup: {metadata.s3_key} ({metadata.last_modified})")
425
+ return metadata
426
+
427
+ except ClientError as e:
428
+ error_code = e.response['Error']['Code']
429
+ logger.error(f"S3 error querying backups: {error_code}")
430
+ raise S3ConnectionError(f"S3 error: {error_code}") from e
431
+
432
+ except Exception as e:
433
+ logger.error(f"Unexpected error querying backups: {e}")
434
+ raise S3ConnectionError(f"Error querying backups: {e}") from e
src/services/s3_config.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ S3 Configuration Module
3
+
4
+ Manages S3 backup/restore configuration loaded from environment variables.
5
+ Provides validation and credential checking for S3-compatible storage
6
+ (AWS S3, MinIO, DigitalOcean Spaces, etc.).
7
+ """
8
+
9
+ import os
10
+ import logging
11
+ from dataclasses import dataclass
12
+ from typing import Optional
13
+
14
+ import boto3
15
+ from botocore.exceptions import ClientError
16
+
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ # Custom Exceptions
22
+ class S3BackupError(Exception):
23
+ """Base exception for S3 backup operations"""
24
+ pass
25
+
26
+
27
+ class S3CredentialsError(S3BackupError):
28
+ """Invalid S3 credentials"""
29
+ pass
30
+
31
+
32
+ class S3BucketNotFoundError(S3BackupError):
33
+ """S3 bucket does not exist"""
34
+ pass
35
+
36
+
37
+ class DatabaseCorruptedError(S3BackupError):
38
+ """SQLite database failed integrity check"""
39
+ pass
40
+
41
+
42
+ class S3ConnectionError(S3BackupError):
43
+ """Network error connecting to S3"""
44
+ pass
45
+
46
+
47
+ class RestoreError(S3BackupError):
48
+ """Critical error during restore that prevents startup"""
49
+ pass
50
+
51
+
52
+ @dataclass
53
+ class S3Config:
54
+ """
55
+ S3 configuration for backup/restore operations.
56
+
57
+ Attributes:
58
+ enabled: Whether S3 backup/restore is enabled
59
+ bucket: S3 bucket name
60
+ access_key: AWS access key ID
61
+ secret_key: AWS secret access key
62
+ region: AWS region (default: us-east-1)
63
+ endpoint_url: Custom S3 endpoint for MinIO/DigitalOcean Spaces
64
+ upload_timeout: Upload timeout in seconds
65
+ download_timeout: Download timeout in seconds
66
+ debounce_seconds: Debounce period for backup requests
67
+ """
68
+ enabled: bool
69
+ bucket: Optional[str] = None
70
+ access_key: Optional[str] = None
71
+ secret_key: Optional[str] = None
72
+ region: str = "us-east-1"
73
+ endpoint_url: Optional[str] = None
74
+ upload_timeout: int = 60
75
+ download_timeout: int = 30
76
+ debounce_seconds: int = 300
77
+
78
+ @staticmethod
79
+ def from_env() -> 'S3Config':
80
+ """
81
+ Factory method to create S3Config from environment variables.
82
+
83
+ Environment Variables:
84
+ S3_BACKUP_ENABLED: "true" or "false" (default: "false")
85
+ S3_BUCKET_NAME: S3 bucket name (required if enabled)
86
+ S3_ACCESS_KEY: AWS access key ID (required if enabled)
87
+ S3_SECRET_KEY: AWS secret access key (required if enabled)
88
+ S3_REGION: AWS region (default: "us-east-1")
89
+ S3_ENDPOINT_URL: Custom S3 endpoint (optional)
90
+ S3_UPLOAD_TIMEOUT: Upload timeout in seconds (default: 60)
91
+ S3_DOWNLOAD_TIMEOUT: Download timeout in seconds (default: 30)
92
+ S3_DEBOUNCE_SECONDS: Debounce period in seconds (default: 300)
93
+
94
+ Returns:
95
+ S3Config instance with enabled=False if configuration is incomplete
96
+ """
97
+ enabled = os.getenv('S3_BACKUP_ENABLED', 'false').lower() == 'true'
98
+
99
+ if not enabled:
100
+ logger.info("S3 backup/restore disabled (S3_BACKUP_ENABLED not set)")
101
+ return S3Config(enabled=False)
102
+
103
+ # Load required configuration
104
+ bucket = os.getenv('S3_BUCKET_NAME')
105
+ access_key = os.getenv('S3_ACCESS_KEY')
106
+ secret_key = os.getenv('S3_SECRET_KEY')
107
+
108
+ # Validate required fields
109
+ missing = []
110
+ if not bucket:
111
+ missing.append('S3_BUCKET_NAME')
112
+ if not access_key:
113
+ missing.append('S3_ACCESS_KEY')
114
+ if not secret_key:
115
+ missing.append('S3_SECRET_KEY')
116
+
117
+ if missing:
118
+ logger.warning(
119
+ f"S3 backup disabled - missing required configuration: {', '.join(missing)}"
120
+ )
121
+ return S3Config(enabled=False)
122
+
123
+ # Load optional configuration with defaults
124
+ region = os.getenv('S3_REGION', 'us-east-1')
125
+ endpoint_url = os.getenv('S3_ENDPOINT_URL') # None for AWS S3
126
+
127
+ try:
128
+ upload_timeout = int(os.getenv('S3_UPLOAD_TIMEOUT', '60'))
129
+ download_timeout = int(os.getenv('S3_DOWNLOAD_TIMEOUT', '30'))
130
+ debounce_seconds = int(os.getenv('S3_DEBOUNCE_SECONDS', '300'))
131
+ except ValueError as e:
132
+ logger.warning(f"Invalid timeout configuration: {e}, using defaults")
133
+ upload_timeout = 60
134
+ download_timeout = 30
135
+ debounce_seconds = 300
136
+
137
+ config = S3Config(
138
+ enabled=True,
139
+ bucket=bucket,
140
+ access_key=access_key,
141
+ secret_key=secret_key,
142
+ region=region,
143
+ endpoint_url=endpoint_url,
144
+ upload_timeout=upload_timeout,
145
+ download_timeout=download_timeout,
146
+ debounce_seconds=debounce_seconds
147
+ )
148
+
149
+ logger.info(
150
+ f"S3 backup enabled - bucket: {bucket}, region: {region}, "
151
+ f"endpoint: {endpoint_url or 'AWS S3'}"
152
+ )
153
+
154
+ return config
155
+
156
+ def validate_credentials(self) -> bool:
157
+ """
158
+ Test S3 credentials by performing a HeadBucket operation.
159
+
160
+ Returns:
161
+ True if credentials are valid and bucket is accessible
162
+ False if credentials are invalid or bucket not found
163
+
164
+ Raises:
165
+ S3ConnectionError: Network or S3 service error
166
+ """
167
+ if not self.enabled:
168
+ return False
169
+
170
+ try:
171
+ s3_client = boto3.client(
172
+ 's3',
173
+ endpoint_url=self.endpoint_url,
174
+ aws_access_key_id=self.access_key,
175
+ aws_secret_access_key=self.secret_key,
176
+ region_name=self.region
177
+ )
178
+
179
+ # HeadBucket validates both credentials and bucket existence
180
+ s3_client.head_bucket(Bucket=self.bucket)
181
+ logger.info(f"S3 credentials validated - bucket '{self.bucket}' is accessible")
182
+ return True
183
+
184
+ except ClientError as e:
185
+ error_code = e.response['Error']['Code']
186
+
187
+ if error_code == '404':
188
+ logger.error(f"S3 bucket not found: {self.bucket}")
189
+ return False
190
+ elif error_code == '403':
191
+ logger.error("S3 credentials invalid or insufficient permissions")
192
+ return False
193
+ else:
194
+ logger.error(f"S3 error during credential validation: {error_code}")
195
+ raise S3ConnectionError(f"S3 error: {error_code}") from e
196
+
197
+ except Exception as e:
198
+ logger.error(f"Unexpected error during S3 credential validation: {e}")
199
+ raise S3ConnectionError(f"S3 connection error: {e}") from e
200
+
201
+ def create_s3_client(self):
202
+ """
203
+ Create a boto3 S3 client with this configuration.
204
+
205
+ Returns:
206
+ boto3.client instance configured for S3
207
+ """
208
+ if not self.enabled:
209
+ raise S3CredentialsError("S3 backup is not enabled")
210
+
211
+ from botocore.config import Config
212
+
213
+ boto_config = Config(
214
+ connect_timeout=5,
215
+ read_timeout=self.download_timeout,
216
+ retries={'max_attempts': 0} # We handle retries manually
217
+ )
218
+
219
+ return boto3.client(
220
+ 's3',
221
+ endpoint_url=self.endpoint_url,
222
+ aws_access_key_id=self.access_key,
223
+ aws_secret_access_key=self.secret_key,
224
+ region_name=self.region,
225
+ config=boto_config
226
+ )
src/services/s3_restore.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ S3 Restore Manager
3
+
4
+ Handles automatic restore of SQLite database from S3-compatible storage
5
+ during webapp startup. Provides validation and fallback logic to ensure
6
+ reliable database restoration.
7
+ """
8
+
9
+ import os
10
+ import sqlite3
11
+ import hashlib
12
+ import logging
13
+ from datetime import datetime
14
+ from typing import Optional
15
+ from enum import Enum
16
+
17
+ import boto3
18
+ from botocore.exceptions import ClientError
19
+
20
+ from ..utils import s3_logger
21
+ from .s3_config import (
22
+ S3Config,
23
+ S3ConnectionError,
24
+ RestoreError,
25
+ DatabaseCorruptedError
26
+ )
27
+
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class RestoreResult(Enum):
33
+ """Result of restore operation."""
34
+ RESTORED_FROM_S3 = "restored_from_s3"
35
+ LOCAL_NEWER = "local_newer"
36
+ NO_BACKUP_FOUND = "no_backup_found"
37
+ VALIDATION_FAILED = "validation_failed"
38
+ NETWORK_ERROR = "network_error"
39
+
40
+
41
+ class RestoreManager:
42
+ """
43
+ Manages automatic restore of SQLite database from S3.
44
+
45
+ Features:
46
+ - Startup restore with validation
47
+ - Timestamp comparison (local vs S3)
48
+ - Checksum verification
49
+ - SQLite integrity check
50
+ - Atomic file replacement
51
+ - Fallback chain for reliability
52
+ """
53
+
54
+ def __init__(self, config: S3Config, db_path: str):
55
+ """
56
+ Initialize the restore manager.
57
+
58
+ Args:
59
+ config: S3 configuration object
60
+ db_path: Absolute path where database should be restored
61
+ """
62
+ self.config = config
63
+ self.db_path = db_path
64
+
65
+ if config.enabled:
66
+ self.s3_client = config.create_s3_client()
67
+ logger.info(f"RestoreManager initialized for {db_path}")
68
+ else:
69
+ self.s3_client = None
70
+ logger.info("RestoreManager initialized but S3 is disabled")
71
+
72
+ def restore_from_s3(self) -> RestoreResult:
73
+ """
74
+ Restore database from S3 with validation and fallback logic.
75
+
76
+ Process:
77
+ 1. List backups in S3
78
+ 2. Find latest by LastModified
79
+ 3. Compare timestamps (local vs S3)
80
+ 4. Download if S3 is newer
81
+ 5. Validate checksum and integrity
82
+ 6. Atomic replace local file
83
+
84
+ Returns:
85
+ RestoreResult enum indicating the outcome
86
+
87
+ Side Effects:
88
+ - May replace local database file atomically
89
+ - Creates temp files during download (cleaned up automatically)
90
+ - Logs all operations with structured logging
91
+ """
92
+ start_time = datetime.now()
93
+ s3_logger.restore_started()
94
+
95
+ if not self.config.enabled:
96
+ duration = (datetime.now() - start_time).total_seconds()
97
+ s3_logger.restore_completed(duration, None, None, RestoreResult.NO_BACKUP_FOUND.value)
98
+ logger.info("Restore skipped - S3 disabled")
99
+ return RestoreResult.NO_BACKUP_FOUND
100
+
101
+ try:
102
+ # List backups from S3
103
+ backups = self._list_backups()
104
+
105
+ if not backups or len(backups) == 0:
106
+ duration = (datetime.now() - start_time).total_seconds()
107
+ s3_logger.restore_completed(duration, None, None, RestoreResult.NO_BACKUP_FOUND.value)
108
+ logger.info("No backups found in S3")
109
+ return RestoreResult.NO_BACKUP_FOUND
110
+
111
+ # Find latest backup
112
+ latest_backup = max(backups, key=lambda x: x['LastModified'])
113
+ s3_key = latest_backup['Key']
114
+ s3_timestamp = latest_backup['LastModified']
115
+
116
+ logger.info(f"Latest S3 backup: {s3_key} ({s3_timestamp})")
117
+
118
+ # Compare with local file timestamp
119
+ if os.path.exists(self.db_path):
120
+ local_mtime = datetime.fromtimestamp(os.path.getmtime(self.db_path))
121
+ local_mtime = local_mtime.replace(tzinfo=s3_timestamp.tzinfo) # Make timezone-aware
122
+
123
+ if local_mtime >= s3_timestamp:
124
+ duration = (datetime.now() - start_time).total_seconds()
125
+ s3_logger.restore_completed(duration, s3_key, None, RestoreResult.LOCAL_NEWER.value)
126
+ logger.info(f"Local database is newer ({local_mtime} >= {s3_timestamp}), skipping restore")
127
+ return RestoreResult.LOCAL_NEWER
128
+
129
+ # Download from S3
130
+ temp_path = f"{self.db_path}.restore"
131
+ download_size = self._download_backup(s3_key, temp_path)
132
+
133
+ # Validate backup
134
+ if not self.validate_backup(temp_path, s3_key):
135
+ # Validation failed - use local fallback
136
+ if os.path.exists(temp_path):
137
+ os.remove(temp_path)
138
+
139
+ duration = (datetime.now() - start_time).total_seconds()
140
+ s3_logger.restore_fallback("validation_failed", "using_local_database")
141
+ s3_logger.restore_completed(duration, s3_key, None, RestoreResult.VALIDATION_FAILED.value)
142
+ logger.warning("Backup validation failed, using local database")
143
+ return RestoreResult.VALIDATION_FAILED
144
+
145
+ # Atomic replace
146
+ self._atomic_replace(temp_path)
147
+
148
+ duration = (datetime.now() - start_time).total_seconds()
149
+ s3_logger.restore_completed(duration, s3_key, download_size, RestoreResult.RESTORED_FROM_S3.value)
150
+ logger.info(f"Restore completed successfully from {s3_key} ({duration:.2f}s)")
151
+
152
+ return RestoreResult.RESTORED_FROM_S3
153
+
154
+ except S3ConnectionError as e:
155
+ duration = (datetime.now() - start_time).total_seconds()
156
+ s3_logger.restore_fallback("network_error", "using_local_database")
157
+ s3_logger.restore_completed(duration, None, None, RestoreResult.NETWORK_ERROR.value)
158
+ logger.error(f"Network error during restore: {e}")
159
+ return RestoreResult.NETWORK_ERROR
160
+
161
+ except Exception as e:
162
+ duration = (datetime.now() - start_time).total_seconds()
163
+ s3_logger.restore_fallback(str(e), "using_local_database")
164
+ s3_logger.restore_completed(duration, None, None, RestoreResult.NETWORK_ERROR.value)
165
+ logger.error(f"Unexpected error during restore: {e}", exc_info=True)
166
+ return RestoreResult.NETWORK_ERROR
167
+
168
+ def _list_backups(self) -> list:
169
+ """
170
+ List all backup files in S3.
171
+
172
+ Returns:
173
+ List of S3 object dictionaries with Key, LastModified, Size
174
+
175
+ Raises:
176
+ S3ConnectionError: Network or S3 service error
177
+ """
178
+ try:
179
+ logger.debug(f"Listing backups in bucket: {self.config.bucket}")
180
+
181
+ response = self.s3_client.list_objects_v2(
182
+ Bucket=self.config.bucket,
183
+ Prefix='contacts-'
184
+ )
185
+
186
+ if 'Contents' not in response:
187
+ return []
188
+
189
+ return response['Contents']
190
+
191
+ except ClientError as e:
192
+ error_code = e.response['Error']['Code']
193
+ logger.error(f"S3 error listing backups: {error_code}")
194
+ raise S3ConnectionError(f"S3 error: {error_code}") from e
195
+
196
+ except Exception as e:
197
+ logger.error(f"Unexpected error listing backups: {e}")
198
+ raise S3ConnectionError(f"Error listing backups: {e}") from e
199
+
200
+ def _compare_timestamps(self, local_path: str, s3_timestamp: datetime) -> bool:
201
+ """
202
+ Compare local file timestamp with S3 backup timestamp.
203
+
204
+ Args:
205
+ local_path: Path to local database file
206
+ s3_timestamp: LastModified timestamp from S3
207
+
208
+ Returns:
209
+ True if S3 backup is newer, False if local is newer or equal
210
+ """
211
+ if not os.path.exists(local_path):
212
+ return True # No local file, S3 is "newer"
213
+
214
+ local_mtime = datetime.fromtimestamp(os.path.getmtime(local_path))
215
+ local_mtime = local_mtime.replace(tzinfo=s3_timestamp.tzinfo)
216
+
217
+ logger.debug(f"Timestamp comparison - Local: {local_mtime}, S3: {s3_timestamp}")
218
+ return s3_timestamp > local_mtime
219
+
220
+ def _download_backup(self, s3_key: str, dest_path: str) -> int:
221
+ """
222
+ Download backup file from S3.
223
+
224
+ Args:
225
+ s3_key: S3 object key
226
+ dest_path: Local path where file should be saved
227
+
228
+ Returns:
229
+ Size of downloaded file in bytes
230
+
231
+ Raises:
232
+ S3ConnectionError: Network or S3 service error
233
+ """
234
+ try:
235
+ logger.info(f"Downloading backup from S3: {s3_key}")
236
+
237
+ self.s3_client.download_file(
238
+ self.config.bucket,
239
+ s3_key,
240
+ dest_path
241
+ )
242
+
243
+ file_size = os.path.getsize(dest_path)
244
+ logger.info(f"Download completed: {file_size} bytes")
245
+ return file_size
246
+
247
+ except ClientError as e:
248
+ error_code = e.response['Error']['Code']
249
+ logger.error(f"S3 error downloading backup: {error_code}")
250
+ raise S3ConnectionError(f"S3 error: {error_code}") from e
251
+
252
+ except Exception as e:
253
+ logger.error(f"Unexpected error downloading backup: {e}")
254
+ raise S3ConnectionError(f"Error downloading backup: {e}") from e
255
+
256
+ def _validate_checksum(self, file_path: str, s3_key: str) -> bool:
257
+ """
258
+ Validate file checksum against S3 metadata.
259
+
260
+ Args:
261
+ file_path: Path to downloaded file
262
+ s3_key: S3 object key
263
+
264
+ Returns:
265
+ True if checksum matches or no checksum in metadata, False if mismatch
266
+ """
267
+ try:
268
+ # Get S3 object metadata
269
+ response = self.s3_client.head_object(
270
+ Bucket=self.config.bucket,
271
+ Key=s3_key
272
+ )
273
+
274
+ remote_checksum = response.get('Metadata', {}).get('sha256')
275
+
276
+ if not remote_checksum:
277
+ logger.debug("No checksum in S3 metadata, skipping validation")
278
+ return True
279
+
280
+ # Calculate local checksum
281
+ sha256_hash = hashlib.sha256()
282
+ with open(file_path, 'rb') as f:
283
+ for chunk in iter(lambda: f.read(8192), b''):
284
+ sha256_hash.update(chunk)
285
+
286
+ local_checksum = sha256_hash.hexdigest()
287
+
288
+ if local_checksum == remote_checksum:
289
+ logger.debug("Checksum validation passed")
290
+ return True
291
+ else:
292
+ logger.error(f"Checksum mismatch - Local: {local_checksum}, Remote: {remote_checksum}")
293
+ return False
294
+
295
+ except Exception as e:
296
+ logger.warning(f"Checksum validation failed: {e}")
297
+ return False
298
+
299
+ def _validate_sqlite_integrity(self, file_path: str) -> bool:
300
+ """
301
+ Validate SQLite database integrity using PRAGMA integrity_check.
302
+
303
+ Args:
304
+ file_path: Path to database file
305
+
306
+ Returns:
307
+ True if database passes integrity check, False otherwise
308
+ """
309
+ try:
310
+ conn = sqlite3.connect(file_path)
311
+ cursor = conn.cursor()
312
+ cursor.execute("PRAGMA integrity_check")
313
+ result = cursor.fetchone()[0]
314
+ conn.close()
315
+
316
+ if result == 'ok':
317
+ logger.debug("SQLite integrity check passed")
318
+ return True
319
+ else:
320
+ logger.error(f"SQLite integrity check failed: {result}")
321
+ return False
322
+
323
+ except Exception as e:
324
+ logger.error(f"SQLite integrity check failed: {e}")
325
+ return False
326
+
327
+ def validate_backup(self, file_path: str, s3_key: Optional[str] = None) -> bool:
328
+ """
329
+ Validate a backup file's integrity.
330
+
331
+ Performs both checksum validation (if S3 metadata available) and
332
+ SQLite integrity check.
333
+
334
+ Args:
335
+ file_path: Path to backup file to validate
336
+ s3_key: Optional S3 key for checksum validation
337
+
338
+ Returns:
339
+ True if file is valid, False otherwise
340
+ """
341
+ logger.info(f"Validating backup: {file_path}")
342
+
343
+ # Validate checksum if S3 key provided
344
+ if s3_key and not self._validate_checksum(file_path, s3_key):
345
+ return False
346
+
347
+ # Validate SQLite integrity
348
+ if not self._validate_sqlite_integrity(file_path):
349
+ return False
350
+
351
+ logger.info("Backup validation passed")
352
+ return True
353
+
354
+ def _atomic_replace(self, temp_path: str) -> None:
355
+ """
356
+ Atomically replace local database file with validated backup.
357
+
358
+ Uses os.replace() which is atomic on POSIX systems.
359
+
360
+ Args:
361
+ temp_path: Path to validated backup file
362
+
363
+ Raises:
364
+ OSError: If atomic replace fails
365
+ """
366
+ try:
367
+ logger.info(f"Replacing database: {self.db_path}")
368
+
369
+ # Create backup directory if it doesn't exist
370
+ os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
371
+
372
+ # Atomic replace
373
+ os.replace(temp_path, self.db_path)
374
+
375
+ logger.info("Database replaced successfully")
376
+
377
+ except OSError as e:
378
+ logger.error(f"Failed to replace database: {e}")
379
+ raise
380
+
381
+
382
+ def restore_on_startup() -> RestoreResult:
383
+ """
384
+ Convenience function to restore database on webapp startup.
385
+
386
+ This function is called from entrypoint.sh before Flask starts.
387
+
388
+ Returns:
389
+ RestoreResult enum indicating the outcome
390
+ """
391
+ try:
392
+ from .s3_config import S3Config
393
+
394
+ config = S3Config.from_env()
395
+ db_path = os.getenv("DATABASE_PATH", "/app/data/contacts.db")
396
+
397
+ restore_manager = RestoreManager(config, db_path)
398
+ result = restore_manager.restore_from_s3()
399
+
400
+ logger.info(f"Startup restore completed: {result.value}")
401
+ return result
402
+
403
+ except Exception as e:
404
+ logger.error(f"Startup restore failed: {e}", exc_info=True)
405
+ s3_logger.restore_fallback(str(e), "using_local_or_empty_database")
406
+ return RestoreResult.NETWORK_ERROR
src/services/storage_service.py CHANGED
@@ -2,6 +2,20 @@
2
  SQLite storage service for user profiles and contact sessions.
3
  Feature: 012-profile-contact-ui
4
  Feature: 001-refine-memory-producer-logic (producer_id generation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
7
  import os
@@ -20,6 +34,37 @@ class NotFoundError(Exception):
20
  pass
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def get_db_connection() -> sqlite3.Connection:
24
  """Get SQLite database connection."""
25
  db_path = os.getenv("DATABASE_PATH", "data/contacts.db")
@@ -136,6 +181,11 @@ def create_or_update_user(
136
  # Fetch created profile
137
  cursor.execute("SELECT * FROM user_profiles WHERE user_id = ?", (user_id,))
138
  row = cursor.fetchone()
 
 
 
 
 
139
 
140
  conn.close()
141
 
@@ -235,13 +285,14 @@ def create_contact_session(
235
  cursor.execute(
236
  """
237
  INSERT INTO contact_sessions
238
- (session_id, user_id, contact_name, contact_description, is_reference,
239
  created_at, last_interaction, normalized_name, sequence_number, producer_id)
240
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
241
  """,
242
  (
243
  session_id,
244
  user_id,
 
245
  contact_name,
246
  contact_description,
247
  is_reference,
@@ -262,6 +313,7 @@ def create_contact_session(
262
  return ContactSession(
263
  session_id=row["session_id"],
264
  user_id=row["user_id"],
 
265
  contact_name=row["contact_name"],
266
  contact_description=row["contact_description"],
267
  is_reference=bool(row["is_reference"]),
@@ -343,13 +395,14 @@ def create_contact_session_with_id(
343
  cursor.execute(
344
  """
345
  INSERT INTO contact_sessions
346
- (session_id, user_id, contact_name, contact_description, is_reference,
347
  created_at, last_interaction, normalized_name, sequence_number, producer_id)
348
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
349
  """,
350
  (
351
  session_id,
352
  user_id,
 
353
  contact_name,
354
  contact_description,
355
  is_reference,
@@ -366,10 +419,16 @@ def create_contact_session_with_id(
366
  cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
367
  row = cursor.fetchone()
368
  conn.close()
 
 
 
 
 
369
 
370
  return ContactSession(
371
  session_id=row["session_id"],
372
  user_id=row["user_id"],
 
373
  contact_name=row["contact_name"],
374
  contact_description=row["contact_description"],
375
  is_reference=bool(row["is_reference"]),
@@ -438,6 +497,7 @@ def list_contact_sessions(
438
  ContactSession(
439
  session_id=row["session_id"],
440
  user_id=row["user_id"],
 
441
  contact_name=row["contact_name"],
442
  contact_description=row["contact_description"],
443
  is_reference=bool(row["is_reference"]),
@@ -483,6 +543,7 @@ def get_contact_session(session_id: str) -> Optional[ContactSession]:
483
  return ContactSession(
484
  session_id=row["session_id"],
485
  user_id=row["user_id"],
 
486
  contact_name=row["contact_name"],
487
  contact_description=row["contact_description"],
488
  is_reference=bool(row["is_reference"]),
@@ -551,6 +612,12 @@ def update_contact_session(
551
  query = f"UPDATE contact_sessions SET {', '.join(updates)} WHERE session_id = ?"
552
  cursor.execute(query, params)
553
  conn.commit()
 
 
 
 
 
 
554
 
555
  # Fetch updated session
556
  cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
 
2
  SQLite storage service for user profiles and contact sessions.
3
  Feature: 012-profile-contact-ui
4
  Feature: 001-refine-memory-producer-logic (producer_id generation)
5
+ Feature: 015-sqlite-s3-backup (S3 backup triggers)
6
+
7
+ S3 Backup Trigger Policy:
8
+ - Backup IS triggered for:
9
+ - New user profile creation (create_or_update_user)
10
+ - New contact session creation (create_contact_session_with_id)
11
+ - Contact metadata changes (update_contact_session: name/description)
12
+
13
+ - Backup is NOT triggered for:
14
+ - User login/last_login updates (create_or_update_user on existing user)
15
+ - last_interaction timestamp updates (update_contact_last_interaction)
16
+ - Message count increments or other ephemeral data
17
+
18
+ This policy optimizes S3 costs by only backing up when critical data changes.
19
  """
20
 
21
  import os
 
34
  pass
35
 
36
 
37
+ # Global S3 backup manager (initialized on first use)
38
+ _backup_manager = None
39
+
40
+
41
+ def _get_backup_manager():
42
+ """
43
+ Lazy initialization of BackupManager.
44
+
45
+ Returns:
46
+ BackupManager instance or None if S3 is disabled
47
+ """
48
+ global _backup_manager
49
+
50
+ if _backup_manager is None:
51
+ try:
52
+ from .s3_config import S3Config
53
+ from .s3_backup import BackupManager
54
+
55
+ config = S3Config.from_env()
56
+ db_path = os.getenv("DATABASE_PATH", "data/contacts.db")
57
+ _backup_manager = BackupManager(config, db_path)
58
+ except Exception as e:
59
+ # Log but don't crash if backup initialization fails
60
+ import logging
61
+ logger = logging.getLogger(__name__)
62
+ logger.warning(f"Failed to initialize S3 backup: {e}")
63
+ _backup_manager = None
64
+
65
+ return _backup_manager
66
+
67
+
68
  def get_db_connection() -> sqlite3.Connection:
69
  """Get SQLite database connection."""
70
  db_path = os.getenv("DATABASE_PATH", "data/contacts.db")
 
181
  # Fetch created profile
182
  cursor.execute("SELECT * FROM user_profiles WHERE user_id = ?", (user_id,))
183
  row = cursor.fetchone()
184
+
185
+ # Trigger S3 backup for new user creation (Feature 015)
186
+ backup_manager = _get_backup_manager()
187
+ if backup_manager:
188
+ backup_manager.request_backup()
189
 
190
  conn.close()
191
 
 
285
  cursor.execute(
286
  """
287
  INSERT INTO contact_sessions
288
+ (session_id, user_id, contact_id, contact_name, contact_description, is_reference,
289
  created_at, last_interaction, normalized_name, sequence_number, producer_id)
290
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
291
  """,
292
  (
293
  session_id,
294
  user_id,
295
+ session_id, # contact_id defaults to session_id
296
  contact_name,
297
  contact_description,
298
  is_reference,
 
313
  return ContactSession(
314
  session_id=row["session_id"],
315
  user_id=row["user_id"],
316
+ contact_id=row.get("contact_id") or row["session_id"],
317
  contact_name=row["contact_name"],
318
  contact_description=row["contact_description"],
319
  is_reference=bool(row["is_reference"]),
 
395
  cursor.execute(
396
  """
397
  INSERT INTO contact_sessions
398
+ (session_id, user_id, contact_id, contact_name, contact_description, is_reference,
399
  created_at, last_interaction, normalized_name, sequence_number, producer_id)
400
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
401
  """,
402
  (
403
  session_id,
404
  user_id,
405
+ session_id, # contact_id defaults to session_id
406
  contact_name,
407
  contact_description,
408
  is_reference,
 
419
  cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
420
  row = cursor.fetchone()
421
  conn.close()
422
+
423
+ # Trigger S3 backup for new contact creation (Feature 015)
424
+ backup_manager = _get_backup_manager()
425
+ if backup_manager:
426
+ backup_manager.request_backup()
427
 
428
  return ContactSession(
429
  session_id=row["session_id"],
430
  user_id=row["user_id"],
431
+ contact_id=row["contact_id"] if row["contact_id"] else row["session_id"],
432
  contact_name=row["contact_name"],
433
  contact_description=row["contact_description"],
434
  is_reference=bool(row["is_reference"]),
 
497
  ContactSession(
498
  session_id=row["session_id"],
499
  user_id=row["user_id"],
500
+ contact_id=safe_row_access(row, "contact_id") or row["session_id"],
501
  contact_name=row["contact_name"],
502
  contact_description=row["contact_description"],
503
  is_reference=bool(row["is_reference"]),
 
543
  return ContactSession(
544
  session_id=row["session_id"],
545
  user_id=row["user_id"],
546
+ contact_id=row["contact_id"] if "contact_id" in columns else row["session_id"],
547
  contact_name=row["contact_name"],
548
  contact_description=row["contact_description"],
549
  is_reference=bool(row["is_reference"]),
 
612
  query = f"UPDATE contact_sessions SET {', '.join(updates)} WHERE session_id = ?"
613
  cursor.execute(query, params)
614
  conn.commit()
615
+
616
+ # Trigger S3 backup for critical metadata changes (Feature 015)
617
+ # Only when name or description changes (not for last_interaction updates)
618
+ backup_manager = _get_backup_manager()
619
+ if backup_manager:
620
+ backup_manager.request_backup()
621
 
622
  # Fetch updated session
623
  cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
src/utils/__pycache__/s3_logger.cpython-311.pyc ADDED
Binary file (6.91 kB). View file
 
src/utils/s3_logger.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Structured JSON Logging for S3 Backup/Restore Operations
3
+
4
+ Provides consistent logging format for observability and debugging.
5
+ All logs are emitted as JSON for easy parsing by log aggregation systems.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from datetime import datetime
11
+ from typing import Optional, Dict, Any
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def _log_event(event: str, **kwargs) -> None:
18
+ """
19
+ Internal helper to emit structured JSON log messages.
20
+
21
+ Args:
22
+ event: Event name (e.g., "backup_started", "restore_completed")
23
+ **kwargs: Additional event-specific fields
24
+ """
25
+ log_data = {
26
+ 'event': event,
27
+ 'timestamp': datetime.utcnow().isoformat() + 'Z',
28
+ **kwargs
29
+ }
30
+
31
+ # Mask sensitive data
32
+ if 's3_key' in log_data:
33
+ # S3 keys are safe to log
34
+ pass
35
+ if 'error' in log_data and isinstance(log_data['error'], Exception):
36
+ # Convert exception to string
37
+ log_data['error'] = str(log_data['error'])
38
+
39
+ logger.info(json.dumps(log_data))
40
+
41
+
42
+ def backup_started(db_path: str, db_size_bytes: int) -> None:
43
+ """
44
+ Log that a backup operation has started.
45
+
46
+ Args:
47
+ db_path: Path to the database file being backed up
48
+ db_size_bytes: Size of the database in bytes
49
+ """
50
+ _log_event(
51
+ 'backup_started',
52
+ db_path=db_path,
53
+ db_size_bytes=db_size_bytes
54
+ )
55
+
56
+
57
+ def backup_completed(
58
+ duration_seconds: float,
59
+ s3_key: str,
60
+ upload_size_bytes: int
61
+ ) -> None:
62
+ """
63
+ Log that a backup operation completed successfully.
64
+
65
+ Args:
66
+ duration_seconds: Time taken for the backup operation
67
+ s3_key: S3 object key where backup was stored
68
+ upload_size_bytes: Size of uploaded file in bytes
69
+ """
70
+ _log_event(
71
+ 'backup_completed',
72
+ duration_seconds=round(duration_seconds, 2),
73
+ s3_key=s3_key,
74
+ upload_size_bytes=upload_size_bytes
75
+ )
76
+
77
+
78
+ def backup_failed(
79
+ error: str,
80
+ retry_attempt: Optional[int] = None,
81
+ max_retries: Optional[int] = None
82
+ ) -> None:
83
+ """
84
+ Log that a backup operation failed.
85
+
86
+ Args:
87
+ error: Error message or exception
88
+ retry_attempt: Current retry attempt number (if retrying)
89
+ max_retries: Maximum number of retries (if retrying)
90
+ """
91
+ log_data = {
92
+ 'error': str(error)
93
+ }
94
+
95
+ if retry_attempt is not None:
96
+ log_data['retry_attempt'] = retry_attempt
97
+ if max_retries is not None:
98
+ log_data['max_retries'] = max_retries
99
+
100
+ _log_event('backup_failed', **log_data)
101
+
102
+
103
+ def restore_started() -> None:
104
+ """Log that a restore operation has started."""
105
+ _log_event('restore_started')
106
+
107
+
108
+ def restore_completed(
109
+ duration_seconds: float,
110
+ s3_key: Optional[str],
111
+ download_size_bytes: Optional[int],
112
+ result: str
113
+ ) -> None:
114
+ """
115
+ Log that a restore operation completed.
116
+
117
+ Args:
118
+ duration_seconds: Time taken for the restore operation
119
+ s3_key: S3 object key that was restored (None if no restore needed)
120
+ download_size_bytes: Size of downloaded file (None if no download)
121
+ result: Restore result enum value (e.g., "restored_from_s3", "local_newer")
122
+ """
123
+ log_data = {
124
+ 'duration_seconds': round(duration_seconds, 2),
125
+ 'result': result
126
+ }
127
+
128
+ if s3_key:
129
+ log_data['s3_key'] = s3_key
130
+ if download_size_bytes:
131
+ log_data['download_size_bytes'] = download_size_bytes
132
+
133
+ _log_event('restore_completed', **log_data)
134
+
135
+
136
+ def restore_fallback(reason: str, fallback_action: str) -> None:
137
+ """
138
+ Log that a restore operation fell back to an alternative.
139
+
140
+ Args:
141
+ reason: Reason for fallback (e.g., "validation_failed", "network_error")
142
+ fallback_action: Action taken (e.g., "using_local_database", "initializing_empty")
143
+ """
144
+ _log_event(
145
+ 'restore_fallback',
146
+ reason=reason,
147
+ fallback_action=fallback_action
148
+ )
149
+
150
+
151
+ def backup_debounced(pending_requests: int, debounce_seconds: int) -> None:
152
+ """
153
+ Log that backup requests are being debounced.
154
+
155
+ Args:
156
+ pending_requests: Number of backup requests collapsed into one
157
+ debounce_seconds: Debounce period in seconds
158
+ """
159
+ _log_event(
160
+ 'backup_debounced',
161
+ pending_requests=pending_requests,
162
+ debounce_seconds=debounce_seconds
163
+ )
164
+
165
+
166
+ def s3_credentials_validated(bucket: str, endpoint: Optional[str]) -> None:
167
+ """
168
+ Log that S3 credentials were successfully validated.
169
+
170
+ Args:
171
+ bucket: S3 bucket name
172
+ endpoint: S3 endpoint URL (None for AWS S3)
173
+ """
174
+ _log_event(
175
+ 's3_credentials_validated',
176
+ bucket=bucket,
177
+ endpoint=endpoint or 'AWS S3'
178
+ )
179
+
180
+
181
+ def s3_credentials_invalid(error: str) -> None:
182
+ """
183
+ Log that S3 credentials validation failed.
184
+
185
+ Args:
186
+ error: Error message
187
+ """
188
+ _log_event(
189
+ 's3_credentials_invalid',
190
+ error=str(error)
191
+ )
192
+
193
+
194
+ def backup_skip_reason(reason: str, operation: str) -> None:
195
+ """
196
+ Log why a backup was skipped.
197
+
198
+ Args:
199
+ reason: Reason for skipping (e.g., "s3_not_enabled", "non_critical_update")
200
+ operation: Operation that would have triggered backup
201
+ """
202
+ _log_event(
203
+ 'backup_skip',
204
+ reason=reason,
205
+ operation=operation
206
+ )
tests/integration/test_s3_minio.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Integration tests for S3 backup/restore with MinIO.
3
+
4
+ Feature: 015-sqlite-s3-backup
5
+
6
+ These tests require a running MinIO instance. They can be run with:
7
+ docker compose up -d minio
8
+ pytest webapp/tests/integration/test_s3_minio.py
9
+ """
10
+
11
+ import os
12
+ import time
13
+ import tempfile
14
+ import sqlite3
15
+ import pytest
16
+
17
+ from src.services.s3_config import S3Config
18
+ from src.services.s3_backup import BackupManager
19
+ from src.services.s3_restore import RestoreManager, RestoreResult
20
+
21
+
22
+ # Check if MinIO is available
23
+ MINIO_AVAILABLE = os.getenv('MINIO_TEST_ENABLED', 'false').lower() == 'true'
24
+
25
+ skip_if_no_minio = pytest.mark.skipif(
26
+ not MINIO_AVAILABLE,
27
+ reason="MinIO not available - set MINIO_TEST_ENABLED=true to run"
28
+ )
29
+
30
+
31
+ @pytest.fixture
32
+ def minio_config():
33
+ """Create S3Config for local MinIO instance."""
34
+ return S3Config(
35
+ enabled=True,
36
+ bucket='test-prepmate-backups',
37
+ access_key='minioadmin',
38
+ secret_key='minioadmin',
39
+ region='us-east-1',
40
+ endpoint_url='http://localhost:9000',
41
+ debounce_seconds=5 # Shorter for tests
42
+ )
43
+
44
+
45
+ @pytest.fixture
46
+ def test_db():
47
+ """Create a temporary test database."""
48
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
49
+ db_path = tmp.name
50
+
51
+ # Initialize database
52
+ conn = sqlite3.connect(db_path)
53
+ conn.execute("CREATE TABLE contacts (id INTEGER PRIMARY KEY, name TEXT)")
54
+ conn.execute("INSERT INTO contacts VALUES (1, 'Alice')")
55
+ conn.execute("INSERT INTO contacts VALUES (2, 'Bob')")
56
+ conn.commit()
57
+ conn.close()
58
+
59
+ yield db_path
60
+
61
+ # Cleanup
62
+ if os.path.exists(db_path):
63
+ os.unlink(db_path)
64
+
65
+
66
+ @skip_if_no_minio
67
+ class TestMinIOIntegration:
68
+ """Integration tests with MinIO S3-compatible storage."""
69
+
70
+ def test_backup_restore_roundtrip(self, minio_config, test_db):
71
+ """Test complete backup and restore cycle."""
72
+ # Create backup manager and execute backup
73
+ backup_manager = BackupManager(minio_config, test_db)
74
+ success = backup_manager.execute_backup_now()
75
+
76
+ assert success is True
77
+
78
+ # Verify backup exists in S3
79
+ metadata = backup_manager.get_latest_backup()
80
+ assert metadata is not None
81
+ assert metadata.s3_key.startswith('contacts-')
82
+ assert metadata.size_bytes > 0
83
+
84
+ # Delete local database
85
+ os.unlink(test_db)
86
+ assert not os.path.exists(test_db)
87
+
88
+ # Restore from S3
89
+ restore_manager = RestoreManager(minio_config, test_db)
90
+ result = restore_manager.restore_from_s3()
91
+
92
+ assert result == RestoreResult.RESTORED_FROM_S3
93
+ assert os.path.exists(test_db)
94
+
95
+ # Verify restored data
96
+ conn = sqlite3.connect(test_db)
97
+ cursor = conn.cursor()
98
+ cursor.execute("SELECT * FROM contacts ORDER BY id")
99
+ rows = cursor.fetchall()
100
+ conn.close()
101
+
102
+ assert len(rows) == 2
103
+ assert rows[0] == (1, 'Alice')
104
+ assert rows[1] == (2, 'Bob')
105
+
106
+ def test_multiple_rapid_creates(self, minio_config, test_db):
107
+ """Test debouncing with 10 rapid backup requests."""
108
+ backup_manager = BackupManager(minio_config, test_db)
109
+
110
+ # Get initial backup count
111
+ initial_metadata = backup_manager.get_latest_backup()
112
+ initial_count = 0 if initial_metadata is None else 1
113
+
114
+ # Request 10 rapid backups
115
+ for i in range(10):
116
+ backup_manager.request_backup()
117
+ time.sleep(0.1) # Small delay between requests
118
+
119
+ # Wait for debounce period + execution
120
+ time.sleep(minio_config.debounce_seconds + 5)
121
+
122
+ # Should have only 1-2 new backups due to debouncing
123
+ # (1 if all collapsed, 2 if one more request came during execution)
124
+ final_metadata = backup_manager.get_latest_backup()
125
+ # Just verify at least one backup succeeded
126
+ assert final_metadata is not None
127
+
128
+ def test_restore_on_webapp_startup(self, minio_config, test_db):
129
+ """Test restore during simulated webapp startup."""
130
+ # Create a backup
131
+ backup_manager = BackupManager(minio_config, test_db)
132
+ backup_manager.execute_backup_now()
133
+
134
+ # Modify local database
135
+ conn = sqlite3.connect(test_db)
136
+ conn.execute("INSERT INTO contacts VALUES (3, 'Charlie')")
137
+ conn.commit()
138
+ conn.close()
139
+
140
+ # Simulate startup restore (should keep newer local)
141
+ restore_manager = RestoreManager(minio_config, test_db)
142
+ result = restore_manager.restore_from_s3()
143
+
144
+ # Local is newer, should not restore
145
+ assert result == RestoreResult.LOCAL_NEWER
146
+
147
+ # Verify Charlie still exists
148
+ conn = sqlite3.connect(test_db)
149
+ cursor = conn.cursor()
150
+ cursor.execute("SELECT COUNT(*) FROM contacts")
151
+ count = cursor.fetchone()[0]
152
+ conn.close()
153
+
154
+ assert count == 3
155
+
156
+ def test_graceful_degradation(self, test_db):
157
+ """Test webapp continues when S3 is unavailable."""
158
+ # Create config with wrong endpoint
159
+ bad_config = S3Config(
160
+ enabled=True,
161
+ bucket='test-bucket',
162
+ access_key='test',
163
+ secret_key='test',
164
+ endpoint_url='http://nonexistent:9999'
165
+ )
166
+
167
+ # Backup should fail gracefully
168
+ backup_manager = BackupManager(bad_config, test_db)
169
+ success = backup_manager.execute_backup_now()
170
+
171
+ assert success is False # Failed but didn't crash
172
+
173
+ # Restore should fall back gracefully
174
+ restore_manager = RestoreManager(bad_config, test_db)
175
+ result = restore_manager.restore_from_s3()
176
+
177
+ assert result == RestoreResult.NETWORK_ERROR
178
+
179
+ def test_performance_budgets(self, minio_config, test_db):
180
+ """Test backup/restore performance meets targets."""
181
+ # Backup performance: <10s for 50MB database
182
+ # (test DB is smaller, so should be much faster)
183
+ backup_manager = BackupManager(minio_config, test_db)
184
+
185
+ start = time.time()
186
+ success = backup_manager.execute_backup_now()
187
+ backup_duration = time.time() - start
188
+
189
+ assert success is True
190
+ assert backup_duration < 10, f"Backup took {backup_duration}s (target: <10s)"
191
+
192
+ # Restore performance: <15s
193
+ restore_manager = RestoreManager(minio_config, test_db)
194
+
195
+ start = time.time()
196
+ result = restore_manager.restore_from_s3()
197
+ restore_duration = time.time() - start
198
+
199
+ assert result in [RestoreResult.RESTORED_FROM_S3, RestoreResult.LOCAL_NEWER]
200
+ assert restore_duration < 15, f"Restore took {restore_duration}s (target: <15s)"
201
+
202
+
203
+ if __name__ == '__main__':
204
+ """
205
+ Run integration tests with MinIO.
206
+
207
+ Usage:
208
+ # Start MinIO
209
+ docker compose up -d minio
210
+
211
+ # Run tests
212
+ MINIO_TEST_ENABLED=true pytest webapp/tests/integration/test_s3_minio.py -v
213
+ """
214
+ pytest.main([__file__, '-v'])
tests/unit/test_s3_backup.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unit tests for BackupManager module.
3
+
4
+ Feature: 015-sqlite-s3-backup
5
+ """
6
+
7
+ import os
8
+ import time
9
+ import tempfile
10
+ import sqlite3
11
+ import pytest
12
+ from unittest.mock import patch, MagicMock, call
13
+ from datetime import datetime
14
+
15
+ from src.services.s3_config import S3Config
16
+ from src.services.s3_backup import BackupManager, BackupMetadata
17
+
18
+
19
+ class TestBackupManagerInit:
20
+ """Tests for BackupManager initialization."""
21
+
22
+ def test_backup_manager_init_enabled(self):
23
+ """Test BackupManager initialization with S3 enabled."""
24
+ config = S3Config(
25
+ enabled=True,
26
+ bucket='test-bucket',
27
+ access_key='test-key',
28
+ secret_key='test-secret'
29
+ )
30
+
31
+ # Create temp database for testing
32
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
33
+ db_path = tmp.name
34
+
35
+ try:
36
+ # Initialize database
37
+ conn = sqlite3.connect(db_path)
38
+ conn.execute("CREATE TABLE test (id INTEGER)")
39
+ conn.close()
40
+
41
+ # Create manager
42
+ with patch.object(config, 'create_s3_client'):
43
+ manager = BackupManager(config, db_path)
44
+
45
+ assert manager.config == config
46
+ assert manager.db_path == db_path
47
+ assert manager.last_backup_request is None
48
+ finally:
49
+ os.unlink(db_path)
50
+
51
+ def test_backup_manager_init_disabled(self):
52
+ """Test BackupManager initialization with S3 disabled."""
53
+ config = S3Config(enabled=False)
54
+
55
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
56
+ db_path = tmp.name
57
+
58
+ try:
59
+ manager = BackupManager(config, db_path)
60
+
61
+ assert manager.config == config
62
+ assert manager.s3_client is None
63
+ finally:
64
+ os.unlink(db_path)
65
+
66
+ def test_backup_manager_init_missing_db(self):
67
+ """Test BackupManager initialization with non-existent database."""
68
+ config = S3Config(enabled=True)
69
+
70
+ with pytest.raises(ValueError, match="does not exist"):
71
+ BackupManager(config, "/nonexistent/path.db")
72
+
73
+
74
+ class TestBackupManagerRequestBackup:
75
+ """Tests for BackupManager.request_backup() method."""
76
+
77
+ @patch('threading.Thread')
78
+ def test_request_backup_debouncing(self, mock_thread):
79
+ """Test that multiple backup requests are debounced."""
80
+ config = S3Config(enabled=True, debounce_seconds=5)
81
+
82
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
83
+ db_path = tmp.name
84
+
85
+ try:
86
+ conn = sqlite3.connect(db_path)
87
+ conn.execute("CREATE TABLE test (id INTEGER)")
88
+ conn.close()
89
+
90
+ with patch.object(config, 'create_s3_client'):
91
+ manager = BackupManager(config, db_path)
92
+
93
+ # Request multiple backups
94
+ manager.request_backup()
95
+ time1 = manager.last_backup_request
96
+
97
+ time.sleep(0.1)
98
+
99
+ manager.request_backup()
100
+ time2 = manager.last_backup_request
101
+
102
+ # Second request should update timestamp
103
+ assert time2 > time1
104
+
105
+ # Thread should be started
106
+ assert mock_thread.called
107
+ finally:
108
+ os.unlink(db_path)
109
+
110
+
111
+ class TestBackupManagerExecuteBackup:
112
+ """Tests for BackupManager._execute_backup() method."""
113
+
114
+ @patch('boto3.client')
115
+ def test_execute_backup_success(self, mock_boto_client):
116
+ """Test successful backup execution."""
117
+ # Setup mocks
118
+ mock_s3 = MagicMock()
119
+ mock_boto_client.return_value = mock_s3
120
+ mock_s3.upload_fileobj.return_value = None
121
+
122
+ config = S3Config(
123
+ enabled=True,
124
+ bucket='test-bucket',
125
+ access_key='test-key',
126
+ secret_key='test-secret'
127
+ )
128
+
129
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
130
+ db_path = tmp.name
131
+
132
+ try:
133
+ # Create test database
134
+ conn = sqlite3.connect(db_path)
135
+ conn.execute("CREATE TABLE test (id INTEGER)")
136
+ conn.execute("INSERT INTO test VALUES (1)")
137
+ conn.commit()
138
+ conn.close()
139
+
140
+ # Create manager and execute backup
141
+ with patch.object(config, 'create_s3_client', return_value=mock_s3):
142
+ manager = BackupManager(config, db_path)
143
+ result = manager.execute_backup_now()
144
+
145
+ assert result is True
146
+ # Verify S3 upload was called
147
+ assert mock_s3.upload_fileobj.called
148
+ finally:
149
+ os.unlink(db_path)
150
+
151
+ @patch('boto3.client')
152
+ def test_execute_backup_s3_failure(self, mock_boto_client):
153
+ """Test backup with S3 upload failure."""
154
+ # Setup mocks
155
+ mock_s3 = MagicMock()
156
+ mock_boto_client.return_value = mock_s3
157
+
158
+ from botocore.exceptions import ClientError
159
+ mock_s3.upload_fileobj.side_effect = ClientError(
160
+ {'Error': {'Code': 'ServiceUnavailable'}},
161
+ 'UploadFileobj'
162
+ )
163
+
164
+ config = S3Config(
165
+ enabled=True,
166
+ bucket='test-bucket',
167
+ access_key='test-key',
168
+ secret_key='test-secret'
169
+ )
170
+
171
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
172
+ db_path = tmp.name
173
+
174
+ try:
175
+ conn = sqlite3.connect(db_path)
176
+ conn.execute("CREATE TABLE test (id INTEGER)")
177
+ conn.close()
178
+
179
+ with patch.object(config, 'create_s3_client', return_value=mock_s3):
180
+ manager = BackupManager(config, db_path)
181
+ result = manager.execute_backup_now()
182
+
183
+ # Backup should fail gracefully
184
+ assert result is False
185
+ finally:
186
+ os.unlink(db_path)
187
+
188
+
189
+ class TestBackupManagerGetLatestBackup:
190
+ """Tests for BackupManager.get_latest_backup() method."""
191
+
192
+ @patch('boto3.client')
193
+ def test_get_latest_backup_success(self, mock_boto_client):
194
+ """Test querying latest backup from S3."""
195
+ mock_s3 = MagicMock()
196
+ mock_boto_client.return_value = mock_s3
197
+
198
+ # Mock S3 response
199
+ mock_s3.list_objects_v2.return_value = {
200
+ 'Contents': [
201
+ {
202
+ 'Key': 'contacts-2025-11-27-10-00-00.db',
203
+ 'LastModified': datetime(2025, 11, 27, 10, 0, 0),
204
+ 'Size': 1048576
205
+ },
206
+ {
207
+ 'Key': 'contacts-2025-11-27-11-00-00.db',
208
+ 'LastModified': datetime(2025, 11, 27, 11, 0, 0),
209
+ 'Size': 1048576
210
+ }
211
+ ]
212
+ }
213
+
214
+ mock_s3.head_object.return_value = {
215
+ 'Metadata': {'sha256': 'abc123'}
216
+ }
217
+
218
+ config = S3Config(
219
+ enabled=True,
220
+ bucket='test-bucket',
221
+ access_key='test-key',
222
+ secret_key='test-secret'
223
+ )
224
+
225
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
226
+ db_path = tmp.name
227
+
228
+ try:
229
+ conn = sqlite3.connect(db_path)
230
+ conn.execute("CREATE TABLE test (id INTEGER)")
231
+ conn.close()
232
+
233
+ with patch.object(config, 'create_s3_client', return_value=mock_s3):
234
+ manager = BackupManager(config, db_path)
235
+ metadata = manager.get_latest_backup()
236
+
237
+ assert metadata is not None
238
+ assert metadata.s3_key == 'contacts-2025-11-27-11-00-00.db'
239
+ assert metadata.checksum_sha256 == 'abc123'
240
+ finally:
241
+ os.unlink(db_path)
tests/unit/test_s3_config.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unit tests for S3Config module.
3
+
4
+ Feature: 015-sqlite-s3-backup
5
+ """
6
+
7
+ import os
8
+ import pytest
9
+ from unittest.mock import patch, MagicMock
10
+
11
+ from src.services.s3_config import (
12
+ S3Config,
13
+ S3CredentialsError,
14
+ S3BucketNotFoundError,
15
+ S3ConnectionError
16
+ )
17
+
18
+
19
+ class TestS3ConfigFromEnv:
20
+ """Tests for S3Config.from_env() factory method."""
21
+
22
+ @patch.dict(os.environ, {
23
+ 'S3_BACKUP_ENABLED': 'true',
24
+ 'S3_BUCKET_NAME': 'test-bucket',
25
+ 'S3_ACCESS_KEY': 'test-access-key',
26
+ 'S3_SECRET_KEY': 'test-secret-key',
27
+ 'S3_REGION': 'us-west-2'
28
+ })
29
+ def test_from_env_valid_config(self):
30
+ """Test loading valid S3 configuration from environment."""
31
+ config = S3Config.from_env()
32
+
33
+ assert config.enabled is True
34
+ assert config.bucket == 'test-bucket'
35
+ assert config.access_key == 'test-access-key'
36
+ assert config.secret_key == 'test-secret-key'
37
+ assert config.region == 'us-west-2'
38
+ assert config.upload_timeout == 60
39
+ assert config.download_timeout == 30
40
+ assert config.debounce_seconds == 300
41
+
42
+ @patch.dict(os.environ, {}, clear=True)
43
+ def test_from_env_missing_config(self):
44
+ """Test that missing S3_BACKUP_ENABLED defaults to disabled."""
45
+ config = S3Config.from_env()
46
+
47
+ assert config.enabled is False
48
+
49
+ @patch.dict(os.environ, {
50
+ 'S3_BACKUP_ENABLED': 'true',
51
+ 'S3_BUCKET_NAME': 'test-bucket',
52
+ # Missing S3_ACCESS_KEY and S3_SECRET_KEY
53
+ }, clear=True)
54
+ def test_from_env_partial_config(self):
55
+ """Test that partial configuration disables the feature with warning."""
56
+ config = S3Config.from_env()
57
+
58
+ assert config.enabled is False
59
+
60
+
61
+ class TestS3ConfigValidateCredentials:
62
+ """Tests for S3Config.validate_credentials() method."""
63
+
64
+ @patch('boto3.client')
65
+ def test_validate_credentials_success(self, mock_boto_client):
66
+ """Test successful credential validation."""
67
+ # Setup mock
68
+ mock_s3 = MagicMock()
69
+ mock_boto_client.return_value = mock_s3
70
+ mock_s3.head_bucket.return_value = {}
71
+
72
+ # Create config
73
+ config = S3Config(
74
+ enabled=True,
75
+ bucket='test-bucket',
76
+ access_key='test-key',
77
+ secret_key='test-secret'
78
+ )
79
+
80
+ # Validate
81
+ result = config.validate_credentials()
82
+
83
+ assert result is True
84
+ mock_s3.head_bucket.assert_called_once_with(Bucket='test-bucket')
85
+
86
+ @patch('boto3.client')
87
+ def test_validate_credentials_bucket_not_found(self, mock_boto_client):
88
+ """Test validation with non-existent bucket."""
89
+ # Setup mock
90
+ mock_s3 = MagicMock()
91
+ mock_boto_client.return_value = mock_s3
92
+
93
+ from botocore.exceptions import ClientError
94
+ mock_s3.head_bucket.side_effect = ClientError(
95
+ {'Error': {'Code': '404'}},
96
+ 'HeadBucket'
97
+ )
98
+
99
+ # Create config
100
+ config = S3Config(
101
+ enabled=True,
102
+ bucket='nonexistent-bucket',
103
+ access_key='test-key',
104
+ secret_key='test-secret'
105
+ )
106
+
107
+ # Validate
108
+ result = config.validate_credentials()
109
+
110
+ assert result is False
111
+
112
+ @patch('boto3.client')
113
+ def test_validate_credentials_access_denied(self, mock_boto_client):
114
+ """Test validation with invalid credentials."""
115
+ # Setup mock
116
+ mock_s3 = MagicMock()
117
+ mock_boto_client.return_value = mock_s3
118
+
119
+ from botocore.exceptions import ClientError
120
+ mock_s3.head_bucket.side_effect = ClientError(
121
+ {'Error': {'Code': '403'}},
122
+ 'HeadBucket'
123
+ )
124
+
125
+ # Create config
126
+ config = S3Config(
127
+ enabled=True,
128
+ bucket='test-bucket',
129
+ access_key='invalid-key',
130
+ secret_key='invalid-secret'
131
+ )
132
+
133
+ # Validate
134
+ result = config.validate_credentials()
135
+
136
+ assert result is False
tests/unit/test_s3_restore.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unit tests for RestoreManager module.
3
+
4
+ Feature: 015-sqlite-s3-backup
5
+ """
6
+
7
+ import os
8
+ import tempfile
9
+ import sqlite3
10
+ import pytest
11
+ from unittest.mock import patch, MagicMock
12
+ from datetime import datetime, timezone
13
+
14
+ from src.services.s3_config import S3Config
15
+ from src.services.s3_restore import RestoreManager, RestoreResult
16
+
17
+
18
+ class TestRestoreManagerRestoreFromS3:
19
+ """Tests for RestoreManager.restore_from_s3() method."""
20
+
21
+ @patch('boto3.client')
22
+ def test_restore_from_s3_success(self, mock_boto_client):
23
+ """Test successful restore from S3."""
24
+ mock_s3 = MagicMock()
25
+ mock_boto_client.return_value = mock_s3
26
+
27
+ # Mock S3 responses
28
+ mock_s3.list_objects_v2.return_value = {
29
+ 'Contents': [{
30
+ 'Key': 'contacts-2025-11-27-12-00-00.db',
31
+ 'LastModified': datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc),
32
+ 'Size': 1048576
33
+ }]
34
+ }
35
+
36
+ mock_s3.head_object.return_value = {
37
+ 'Metadata': {'sha256': 'abc123'}
38
+ }
39
+
40
+ # Create temp files
41
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
42
+ db_path = tmp.name
43
+
44
+ try:
45
+ # Create a valid SQLite database for download
46
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as backup_tmp:
47
+ backup_path = backup_tmp.name
48
+
49
+ conn = sqlite3.connect(backup_path)
50
+ conn.execute("CREATE TABLE test (id INTEGER)")
51
+ conn.execute("INSERT INTO test VALUES (1)")
52
+ conn.commit()
53
+ conn.close()
54
+
55
+ # Mock download to copy the backup file
56
+ def mock_download(bucket, key, dest):
57
+ import shutil
58
+ shutil.copy(backup_path, dest)
59
+
60
+ mock_s3.download_file.side_effect = mock_download
61
+
62
+ config = S3Config(
63
+ enabled=True,
64
+ bucket='test-bucket',
65
+ access_key='test-key',
66
+ secret_key='test-secret'
67
+ )
68
+
69
+ with patch.object(config, 'create_s3_client', return_value=mock_s3):
70
+ manager = RestoreManager(config, db_path)
71
+ result = manager.restore_from_s3()
72
+
73
+ # Should restore successfully
74
+ assert result == RestoreResult.RESTORED_FROM_S3
75
+ assert os.path.exists(db_path)
76
+
77
+ # Verify database was restored correctly
78
+ conn = sqlite3.connect(db_path)
79
+ cursor = conn.cursor()
80
+ cursor.execute("SELECT * FROM test")
81
+ rows = cursor.fetchall()
82
+ conn.close()
83
+
84
+ assert len(rows) == 1
85
+ assert rows[0][0] == 1
86
+
87
+ os.unlink(backup_path)
88
+ finally:
89
+ if os.path.exists(db_path):
90
+ os.unlink(db_path)
91
+
92
+ @patch('boto3.client')
93
+ def test_restore_validation_failure(self, mock_boto_client):
94
+ """Test restore with corrupted backup file."""
95
+ mock_s3 = MagicMock()
96
+ mock_boto_client.return_value = mock_s3
97
+
98
+ mock_s3.list_objects_v2.return_value = {
99
+ 'Contents': [{
100
+ 'Key': 'contacts-2025-11-27-12-00-00.db',
101
+ 'LastModified': datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc),
102
+ 'Size': 1048576
103
+ }]
104
+ }
105
+
106
+ # Mock download to create invalid file
107
+ def mock_download(bucket, key, dest):
108
+ with open(dest, 'wb') as f:
109
+ f.write(b'invalid sqlite data')
110
+
111
+ mock_s3.download_file.side_effect = mock_download
112
+
113
+ config = S3Config(enabled=True, bucket='test-bucket')
114
+
115
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
116
+ db_path = tmp.name
117
+
118
+ try:
119
+ with patch.object(config, 'create_s3_client', return_value=mock_s3):
120
+ manager = RestoreManager(config, db_path)
121
+ result = manager.restore_from_s3()
122
+
123
+ # Should fail validation and fall back
124
+ assert result == RestoreResult.VALIDATION_FAILED
125
+ finally:
126
+ if os.path.exists(db_path):
127
+ os.unlink(db_path)
128
+
129
+ @patch('boto3.client')
130
+ def test_restore_no_backup_found(self, mock_boto_client):
131
+ """Test restore when no backups exist in S3."""
132
+ mock_s3 = MagicMock()
133
+ mock_boto_client.return_value = mock_s3
134
+
135
+ # No backups in S3
136
+ mock_s3.list_objects_v2.return_value = {}
137
+
138
+ config = S3Config(enabled=True, bucket='test-bucket')
139
+
140
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
141
+ db_path = tmp.name
142
+
143
+ try:
144
+ with patch.object(config, 'create_s3_client', return_value=mock_s3):
145
+ manager = RestoreManager(config, db_path)
146
+ result = manager.restore_from_s3()
147
+
148
+ assert result == RestoreResult.NO_BACKUP_FOUND
149
+ finally:
150
+ if os.path.exists(db_path):
151
+ os.unlink(db_path)
152
+
153
+
154
+ class TestRestoreManagerTimestampComparison:
155
+ """Tests for RestoreManager._compare_timestamps() method."""
156
+
157
+ def test_timestamp_comparison_s3_newer(self):
158
+ """Test timestamp comparison when S3 backup is newer."""
159
+ config = S3Config(enabled=True)
160
+
161
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
162
+ db_path = tmp.name
163
+
164
+ try:
165
+ # Create local file
166
+ with open(db_path, 'w') as f:
167
+ f.write('test')
168
+
169
+ # Set local mtime to past
170
+ old_time = datetime(2025, 11, 27, 10, 0, 0).timestamp()
171
+ os.utime(db_path, (old_time, old_time))
172
+
173
+ with patch.object(config, 'create_s3_client'):
174
+ manager = RestoreManager(config, db_path)
175
+
176
+ # S3 timestamp is newer
177
+ s3_timestamp = datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc)
178
+ result = manager._compare_timestamps(db_path, s3_timestamp)
179
+
180
+ assert result is True
181
+ finally:
182
+ os.unlink(db_path)
183
+
184
+ def test_timestamp_comparison_local_newer(self):
185
+ """Test timestamp comparison when local file is newer."""
186
+ config = S3Config(enabled=True)
187
+
188
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
189
+ db_path = tmp.name
190
+
191
+ try:
192
+ # Create local file with current time
193
+ with open(db_path, 'w') as f:
194
+ f.write('test')
195
+
196
+ with patch.object(config, 'create_s3_client'):
197
+ manager = RestoreManager(config, db_path)
198
+
199
+ # S3 timestamp is older
200
+ s3_timestamp = datetime(2025, 11, 27, 10, 0, 0, tzinfo=timezone.utc)
201
+ result = manager._compare_timestamps(db_path, s3_timestamp)
202
+
203
+ assert result is False
204
+ finally:
205
+ os.unlink(db_path)