Spaces:
Sleeping
Sleeping
Christian Kniep
commited on
Commit
·
5d3ee93
1
Parent(s):
2e18bf2
update to v2
Browse files- entrypoint.sh +51 -1
- migrations/003_add_contact_id.sql +12 -0
- requirements.txt +3 -0
- src/__pycache__/app.cpython-311.pyc +0 -0
- src/models/__init__.py +1 -0
- src/models/__pycache__/__init__.cpython-311.pyc +0 -0
- src/routes/__pycache__/auth.cpython-311.pyc +0 -0
- src/routes/__pycache__/contacts.cpython-311.pyc +0 -0
- src/routes/__pycache__/profile.cpython-311.pyc +0 -0
- src/routes/__pycache__/settings.cpython-311.pyc +0 -0
- src/routes/auth.py +4 -2
- src/routes/contacts.py +6 -2
- src/services/__pycache__/backend_client.cpython-311.pyc +0 -0
- src/services/__pycache__/s3_backup.cpython-311.pyc +0 -0
- src/services/__pycache__/s3_config.cpython-311.pyc +0 -0
- src/services/__pycache__/s3_restore.cpython-311.pyc +0 -0
- src/services/__pycache__/storage_service.cpython-311.pyc +0 -0
- src/services/backend_client.py +31 -8
- src/services/s3_backup.py +434 -0
- src/services/s3_config.py +226 -0
- src/services/s3_restore.py +406 -0
- src/services/storage_service.py +71 -4
- src/utils/__pycache__/s3_logger.cpython-311.pyc +0 -0
- src/utils/s3_logger.py +206 -0
- tests/integration/test_s3_minio.py +214 -0
- tests/unit/test_s3_backup.py +241 -0
- tests/unit/test_s3_config.py +136 -0
- tests/unit/test_s3_restore.py +205 -0
entrypoint.sh
CHANGED
|
@@ -1,8 +1,29 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
-
# Webapp entrypoint script - runs database migrations before starting Gunicorn
|
| 3 |
|
| 4 |
set -e
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
echo "[ENTRYPOINT] Running database migrations..."
|
| 7 |
|
| 8 |
# Run Python migration script
|
|
@@ -42,6 +63,35 @@ if not table_exists:
|
|
| 42 |
cursor.execute("PRAGMA table_info(contact_sessions)")
|
| 43 |
columns = [row[1] for row in cursor.fetchall()]
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
if 'normalized_name' not in columns:
|
| 46 |
print("[MIGRATION] Applying migration: Add producer-related columns")
|
| 47 |
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
+
# Webapp entrypoint script - restores from S3 and runs database migrations before starting Gunicorn
|
| 3 |
|
| 4 |
set -e
|
| 5 |
|
| 6 |
+
echo "[ENTRYPOINT] Checking for S3 restore..."
|
| 7 |
+
|
| 8 |
+
# Restore database from S3 if enabled (Feature 015)
|
| 9 |
+
python3 <<'RESTORE_PYEOF'
|
| 10 |
+
import os
|
| 11 |
+
import sys
|
| 12 |
+
|
| 13 |
+
# Add src directory to Python path
|
| 14 |
+
sys.path.insert(0, '/app')
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
from src.services.s3_restore import restore_on_startup
|
| 18 |
+
|
| 19 |
+
result = restore_on_startup()
|
| 20 |
+
print(f"[S3_RESTORE] Result: {result.value}")
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"[S3_RESTORE] Failed (non-fatal): {e}")
|
| 23 |
+
# Continue startup even if restore fails
|
| 24 |
+
|
| 25 |
+
RESTORE_PYEOF
|
| 26 |
+
|
| 27 |
echo "[ENTRYPOINT] Running database migrations..."
|
| 28 |
|
| 29 |
# Run Python migration script
|
|
|
|
| 63 |
cursor.execute("PRAGMA table_info(contact_sessions)")
|
| 64 |
columns = [row[1] for row in cursor.fetchall()]
|
| 65 |
|
| 66 |
+
# Step 2a: Add contact_id column if missing (v2 API alignment)
|
| 67 |
+
if 'contact_id' not in columns:
|
| 68 |
+
print("[MIGRATION] Applying migration: Add contact_id column")
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
cursor.execute("ALTER TABLE contact_sessions ADD COLUMN contact_id VARCHAR(255) NOT NULL DEFAULT ''")
|
| 72 |
+
print(" ✓ Added contact_id column")
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f" ⚠ contact_id: {e}")
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
cursor.execute("UPDATE contact_sessions SET contact_id = session_id WHERE contact_id = ''")
|
| 78 |
+
print(f" ✓ Backfilled {cursor.rowcount} existing contact_id values")
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f" ⚠ backfill contact_id: {e}")
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_contact_sessions_contact_id ON contact_sessions(contact_id)")
|
| 84 |
+
print(" ✓ Created index idx_contact_sessions_contact_id")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f" ⚠ index: {e}")
|
| 87 |
+
|
| 88 |
+
conn.commit()
|
| 89 |
+
print("[MIGRATION] contact_id migration complete")
|
| 90 |
+
|
| 91 |
+
# Refresh columns list after contact_id migration
|
| 92 |
+
cursor.execute("PRAGMA table_info(contact_sessions)")
|
| 93 |
+
columns = [row[1] for row in cursor.fetchall()]
|
| 94 |
+
|
| 95 |
if 'normalized_name' not in columns:
|
| 96 |
print("[MIGRATION] Applying migration: Add producer-related columns")
|
| 97 |
|
migrations/003_add_contact_id.sql
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- Migration: Add contact_id to contact_sessions table
|
| 2 |
+
-- This aligns with v2 API where contact_id maps to project_id in backend
|
| 3 |
+
|
| 4 |
+
-- Add contact_id column (default to session_id for existing records)
|
| 5 |
+
ALTER TABLE contact_sessions ADD COLUMN contact_id VARCHAR(255) NOT NULL DEFAULT '';
|
| 6 |
+
|
| 7 |
+
-- Update existing records: set contact_id from session_id
|
| 8 |
+
-- Format is typically user_id_uuid, we'll use the full session_id as contact_id
|
| 9 |
+
UPDATE contact_sessions SET contact_id = session_id WHERE contact_id = '';
|
| 10 |
+
|
| 11 |
+
-- Create index for contact_id lookups
|
| 12 |
+
CREATE INDEX IF NOT EXISTS idx_contact_sessions_contact_id ON contact_sessions(contact_id);
|
requirements.txt
CHANGED
|
@@ -18,6 +18,9 @@ opentelemetry-instrumentation-requests==0.42b0
|
|
| 18 |
# Database
|
| 19 |
SQLAlchemy==2.0.23
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
# Configuration
|
| 22 |
python-dotenv==1.0.0
|
| 23 |
|
|
|
|
| 18 |
# Database
|
| 19 |
SQLAlchemy==2.0.23
|
| 20 |
|
| 21 |
+
# S3 backup/restore
|
| 22 |
+
boto3>=1.34.0
|
| 23 |
+
|
| 24 |
# Configuration
|
| 25 |
python-dotenv==1.0.0
|
| 26 |
|
src/__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/src/__pycache__/app.cpython-311.pyc and b/src/__pycache__/app.cpython-311.pyc differ
|
|
|
src/models/__init__.py
CHANGED
|
@@ -26,6 +26,7 @@ class ContactSession:
|
|
| 26 |
|
| 27 |
session_id: str # Format: {user_id}_{UUID_v4}
|
| 28 |
user_id: str
|
|
|
|
| 29 |
contact_name: str
|
| 30 |
contact_description: Optional[str]
|
| 31 |
is_reference: bool
|
|
|
|
| 26 |
|
| 27 |
session_id: str # Format: {user_id}_{UUID_v4}
|
| 28 |
user_id: str
|
| 29 |
+
contact_id: str # Contact identifier (maps to project_id in v2 backend)
|
| 30 |
contact_name: str
|
| 31 |
contact_description: Optional[str]
|
| 32 |
is_reference: bool
|
src/models/__pycache__/__init__.cpython-311.pyc
CHANGED
|
Binary files a/src/models/__pycache__/__init__.cpython-311.pyc and b/src/models/__pycache__/__init__.cpython-311.pyc differ
|
|
|
src/routes/__pycache__/auth.cpython-311.pyc
CHANGED
|
Binary files a/src/routes/__pycache__/auth.cpython-311.pyc and b/src/routes/__pycache__/auth.cpython-311.pyc differ
|
|
|
src/routes/__pycache__/contacts.cpython-311.pyc
CHANGED
|
Binary files a/src/routes/__pycache__/contacts.cpython-311.pyc and b/src/routes/__pycache__/contacts.cpython-311.pyc differ
|
|
|
src/routes/__pycache__/profile.cpython-311.pyc
CHANGED
|
Binary files a/src/routes/__pycache__/profile.cpython-311.pyc and b/src/routes/__pycache__/profile.cpython-311.pyc differ
|
|
|
src/routes/__pycache__/settings.cpython-311.pyc
CHANGED
|
Binary files a/src/routes/__pycache__/settings.cpython-311.pyc and b/src/routes/__pycache__/settings.cpython-311.pyc differ
|
|
|
src/routes/auth.py
CHANGED
|
@@ -108,8 +108,10 @@ def callback():
|
|
| 108 |
# New user - create backend session first, then store with returned session_id
|
| 109 |
try:
|
| 110 |
backend_response = backend_client.create_session(
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
| 113 |
)
|
| 114 |
backend_session_id = backend_response.get("session_id")
|
| 115 |
|
|
|
|
| 108 |
# New user - create backend session first, then store with returned session_id
|
| 109 |
try:
|
| 110 |
backend_response = backend_client.create_session(
|
| 111 |
+
user_id=user_id,
|
| 112 |
+
contact_id="user-profile",
|
| 113 |
+
description=f"{display_name}'s Profile",
|
| 114 |
+
is_reference=False
|
| 115 |
)
|
| 116 |
backend_session_id = backend_response.get("session_id")
|
| 117 |
|
src/routes/contacts.py
CHANGED
|
@@ -137,9 +137,13 @@ def create_contact():
|
|
| 137 |
|
| 138 |
try:
|
| 139 |
# Phase 1: Create backend session in PostgreSQL
|
| 140 |
-
title = f"Contact: {contact_name}"
|
| 141 |
logger.info(f"[CONTACT_CREATE] Phase 1: Creating backend session for user {user_id}, contact '{contact_name}'")
|
| 142 |
-
backend_response = backend_api.create_session(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
session_id = backend_response.get("session_id")
|
| 144 |
|
| 145 |
if not session_id:
|
|
|
|
| 137 |
|
| 138 |
try:
|
| 139 |
# Phase 1: Create backend session in PostgreSQL
|
|
|
|
| 140 |
logger.info(f"[CONTACT_CREATE] Phase 1: Creating backend session for user {user_id}, contact '{contact_name}'")
|
| 141 |
+
backend_response = backend_api.create_session(
|
| 142 |
+
user_id=user_id,
|
| 143 |
+
contact_id=contact_name, # Use contact name as contact_id initially
|
| 144 |
+
description=f"Contact: {contact_name}",
|
| 145 |
+
is_reference=False
|
| 146 |
+
)
|
| 147 |
session_id = backend_response.get("session_id")
|
| 148 |
|
| 149 |
if not session_id:
|
src/services/__pycache__/backend_client.cpython-311.pyc
CHANGED
|
Binary files a/src/services/__pycache__/backend_client.cpython-311.pyc and b/src/services/__pycache__/backend_client.cpython-311.pyc differ
|
|
|
src/services/__pycache__/s3_backup.cpython-311.pyc
ADDED
|
Binary file (22.4 kB). View file
|
|
|
src/services/__pycache__/s3_config.cpython-311.pyc
ADDED
|
Binary file (9.99 kB). View file
|
|
|
src/services/__pycache__/s3_restore.cpython-311.pyc
ADDED
|
Binary file (20.2 kB). View file
|
|
|
src/services/__pycache__/storage_service.cpython-311.pyc
CHANGED
|
Binary files a/src/services/__pycache__/storage_service.cpython-311.pyc and b/src/services/__pycache__/storage_service.cpython-311.pyc differ
|
|
|
src/services/backend_client.py
CHANGED
|
@@ -212,23 +212,35 @@ class BackendAPIClient:
|
|
| 212 |
finally:
|
| 213 |
self._track_latency(start_time)
|
| 214 |
|
| 215 |
-
def create_session(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
"""
|
| 217 |
-
Create a new session (
|
| 218 |
|
| 219 |
Args:
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
| 222 |
|
| 223 |
Returns:
|
| 224 |
-
|
| 225 |
|
| 226 |
Raises:
|
| 227 |
BackendAPIError: If API request fails
|
| 228 |
"""
|
| 229 |
url = f"{self.base_url}/sessions"
|
| 230 |
|
| 231 |
-
payload = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
# Create span for backend call with OpenTelemetry
|
| 234 |
tracer = trace.get_tracer(__name__)
|
|
@@ -242,7 +254,9 @@ class BackendAPIClient:
|
|
| 242 |
span.set_attribute("http.method", "POST")
|
| 243 |
span.set_attribute("http.url", url)
|
| 244 |
span.set_attribute("user_id", user_id)
|
| 245 |
-
span.set_attribute("
|
|
|
|
|
|
|
| 246 |
|
| 247 |
try:
|
| 248 |
response = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
|
|
@@ -326,7 +340,16 @@ class BackendAPIClient:
|
|
| 326 |
user_id: User ID to list sessions for
|
| 327 |
|
| 328 |
Returns:
|
| 329 |
-
List of session dicts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
Raises:
|
| 332 |
BackendAPIError: If API request fails
|
|
|
|
| 212 |
finally:
|
| 213 |
self._track_latency(start_time)
|
| 214 |
|
| 215 |
+
def create_session(
|
| 216 |
+
self,
|
| 217 |
+
user_id: str,
|
| 218 |
+
contact_id: str,
|
| 219 |
+
description: str = "",
|
| 220 |
+
is_reference: bool = False
|
| 221 |
+
) -> Dict[str, Any]:
|
| 222 |
"""
|
| 223 |
+
Create a new session in the backend API (v2 format).
|
| 224 |
|
| 225 |
Args:
|
| 226 |
+
user_id: User ID (required for API authentication)
|
| 227 |
+
contact_id: Contact identifier (maps to project_id in v2)
|
| 228 |
+
description: Session description (optional)
|
| 229 |
+
is_reference: Whether this is a reference session
|
| 230 |
|
| 231 |
Returns:
|
| 232 |
+
Response dict with session_id, contact_id, description, org_id, project_id
|
| 233 |
|
| 234 |
Raises:
|
| 235 |
BackendAPIError: If API request fails
|
| 236 |
"""
|
| 237 |
url = f"{self.base_url}/sessions"
|
| 238 |
|
| 239 |
+
payload = {
|
| 240 |
+
"contact_id": contact_id,
|
| 241 |
+
"description": description,
|
| 242 |
+
"is_reference": is_reference
|
| 243 |
+
}
|
| 244 |
|
| 245 |
# Create span for backend call with OpenTelemetry
|
| 246 |
tracer = trace.get_tracer(__name__)
|
|
|
|
| 254 |
span.set_attribute("http.method", "POST")
|
| 255 |
span.set_attribute("http.url", url)
|
| 256 |
span.set_attribute("user_id", user_id)
|
| 257 |
+
span.set_attribute("contact_id", contact_id)
|
| 258 |
+
span.set_attribute("description", description)
|
| 259 |
+
span.set_attribute("is_reference", is_reference)
|
| 260 |
|
| 261 |
try:
|
| 262 |
response = requests.post(url, json=payload, headers=headers, timeout=self.timeout)
|
|
|
|
| 340 |
user_id: User ID to list sessions for
|
| 341 |
|
| 342 |
Returns:
|
| 343 |
+
List of session dicts with fields:
|
| 344 |
+
- id: Session ID
|
| 345 |
+
- contact_id: Contact identifier (project_id in v2)
|
| 346 |
+
- title: Session title (legacy, same as description)
|
| 347 |
+
- description: Session description
|
| 348 |
+
- user_id: User ID
|
| 349 |
+
- created_at: Unix timestamp
|
| 350 |
+
- last_interaction: Unix timestamp
|
| 351 |
+
- message_count: Number of messages
|
| 352 |
+
- is_reference: Reference flag
|
| 353 |
|
| 354 |
Raises:
|
| 355 |
BackendAPIError: If API request fails
|
src/services/s3_backup.py
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
S3 Backup Manager
|
| 3 |
+
|
| 4 |
+
Handles automatic backup of SQLite database to S3-compatible storage.
|
| 5 |
+
Provides debouncing to prevent excessive S3 uploads and background
|
| 6 |
+
threading for non-blocking backup operations.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import sqlite3
|
| 11 |
+
import hashlib
|
| 12 |
+
import threading
|
| 13 |
+
import time
|
| 14 |
+
import logging
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
from typing import Optional
|
| 17 |
+
from dataclasses import dataclass
|
| 18 |
+
|
| 19 |
+
import boto3
|
| 20 |
+
from botocore.exceptions import ClientError
|
| 21 |
+
|
| 22 |
+
from ..utils import s3_logger
|
| 23 |
+
from .s3_config import (
|
| 24 |
+
S3Config,
|
| 25 |
+
S3BackupError,
|
| 26 |
+
S3CredentialsError,
|
| 27 |
+
S3BucketNotFoundError,
|
| 28 |
+
S3ConnectionError,
|
| 29 |
+
DatabaseCorruptedError
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
logger = logging.getLogger(__name__)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class BackupMetadata:
|
| 38 |
+
"""Metadata for a backup file in S3."""
|
| 39 |
+
s3_key: str
|
| 40 |
+
last_modified: datetime
|
| 41 |
+
size_bytes: int
|
| 42 |
+
checksum_sha256: Optional[str] = None
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class BackupManager:
|
| 46 |
+
"""
|
| 47 |
+
Manages automatic backup of SQLite database to S3.
|
| 48 |
+
|
| 49 |
+
Features:
|
| 50 |
+
- Non-blocking backup requests with debouncing
|
| 51 |
+
- Background thread execution
|
| 52 |
+
- Retry logic with exponential backoff
|
| 53 |
+
- Checksum validation
|
| 54 |
+
- Graceful error handling
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
def __init__(self, config: S3Config, db_path: str):
|
| 58 |
+
"""
|
| 59 |
+
Initialize the backup manager.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
config: S3 configuration object
|
| 63 |
+
db_path: Absolute path to SQLite database file
|
| 64 |
+
|
| 65 |
+
Raises:
|
| 66 |
+
ValueError: If db_path does not exist or is not readable
|
| 67 |
+
"""
|
| 68 |
+
if not os.path.exists(db_path):
|
| 69 |
+
raise ValueError(f"Database file does not exist: {db_path}")
|
| 70 |
+
|
| 71 |
+
if not os.access(db_path, os.R_OK):
|
| 72 |
+
raise ValueError(f"Database file is not readable: {db_path}")
|
| 73 |
+
|
| 74 |
+
self.config = config
|
| 75 |
+
self.db_path = db_path
|
| 76 |
+
self.last_backup_request = None
|
| 77 |
+
self.backup_lock = threading.Lock()
|
| 78 |
+
self._debounce_thread = None
|
| 79 |
+
|
| 80 |
+
if config.enabled:
|
| 81 |
+
self.s3_client = config.create_s3_client()
|
| 82 |
+
logger.info(f"BackupManager initialized for {db_path}")
|
| 83 |
+
else:
|
| 84 |
+
self.s3_client = None
|
| 85 |
+
logger.info("BackupManager initialized but S3 is disabled")
|
| 86 |
+
|
| 87 |
+
def request_backup(self) -> None:
|
| 88 |
+
"""
|
| 89 |
+
Non-blocking method to request a database backup.
|
| 90 |
+
|
| 91 |
+
Uses debouncing to prevent excessive S3 uploads. Multiple requests
|
| 92 |
+
within the debounce period are collapsed into a single backup.
|
| 93 |
+
|
| 94 |
+
Side Effects:
|
| 95 |
+
- Starts background thread if not already running
|
| 96 |
+
- Updates last_backup_request timestamp
|
| 97 |
+
"""
|
| 98 |
+
if not self.config.enabled:
|
| 99 |
+
s3_logger.backup_skip_reason('s3_not_enabled', 'backup_requested')
|
| 100 |
+
return
|
| 101 |
+
|
| 102 |
+
with self.backup_lock:
|
| 103 |
+
self.last_backup_request = time.time()
|
| 104 |
+
|
| 105 |
+
# Start debounce thread if not already running
|
| 106 |
+
if self._debounce_thread is None or not self._debounce_thread.is_alive():
|
| 107 |
+
self._debounce_thread = threading.Thread(
|
| 108 |
+
target=self._debounced_backup,
|
| 109 |
+
daemon=True
|
| 110 |
+
)
|
| 111 |
+
self._debounce_thread.start()
|
| 112 |
+
logger.debug("Started debounce thread for backup")
|
| 113 |
+
|
| 114 |
+
def _debounced_backup(self) -> None:
|
| 115 |
+
"""
|
| 116 |
+
Wait for debounce period, then execute backup.
|
| 117 |
+
|
| 118 |
+
This runs in a background thread and waits for the debounce period
|
| 119 |
+
to ensure no more backup requests arrive before executing.
|
| 120 |
+
"""
|
| 121 |
+
debounce_seconds = self.config.debounce_seconds
|
| 122 |
+
|
| 123 |
+
# Wait for debounce period
|
| 124 |
+
time.sleep(debounce_seconds)
|
| 125 |
+
|
| 126 |
+
# Check if another request came in during debounce
|
| 127 |
+
with self.backup_lock:
|
| 128 |
+
time_since_last_request = time.time() - self.last_backup_request
|
| 129 |
+
if time_since_last_request < debounce_seconds:
|
| 130 |
+
# Another request came in, let it handle the backup
|
| 131 |
+
logger.debug("Backup skipped - newer request pending")
|
| 132 |
+
return
|
| 133 |
+
|
| 134 |
+
# Execute the backup
|
| 135 |
+
self._execute_backup()
|
| 136 |
+
|
| 137 |
+
def execute_backup_now(self) -> bool:
|
| 138 |
+
"""
|
| 139 |
+
Synchronous method to execute backup immediately, bypassing debounce.
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
True if backup succeeded, False if it failed
|
| 143 |
+
|
| 144 |
+
Raises:
|
| 145 |
+
S3CredentialsError: Invalid S3 credentials
|
| 146 |
+
S3BucketNotFoundError: Bucket does not exist
|
| 147 |
+
DatabaseCorruptedError: Source database failed integrity check
|
| 148 |
+
"""
|
| 149 |
+
if not self.config.enabled:
|
| 150 |
+
logger.warning("Backup requested but S3 is disabled")
|
| 151 |
+
return False
|
| 152 |
+
|
| 153 |
+
return self._execute_backup()
|
| 154 |
+
|
| 155 |
+
def _execute_backup(self) -> bool:
|
| 156 |
+
"""
|
| 157 |
+
Execute the actual backup operation.
|
| 158 |
+
|
| 159 |
+
Process:
|
| 160 |
+
1. Check database integrity
|
| 161 |
+
2. Create hot backup using sqlite3.backup()
|
| 162 |
+
3. Calculate checksum
|
| 163 |
+
4. Upload to S3 with retries
|
| 164 |
+
5. Clean up temp files
|
| 165 |
+
|
| 166 |
+
Returns:
|
| 167 |
+
True if backup succeeded, False otherwise
|
| 168 |
+
"""
|
| 169 |
+
start_time = time.time()
|
| 170 |
+
db_size = os.path.getsize(self.db_path)
|
| 171 |
+
|
| 172 |
+
s3_logger.backup_started(self.db_path, db_size)
|
| 173 |
+
logger.info(f"Starting backup of {self.db_path} ({db_size} bytes)")
|
| 174 |
+
|
| 175 |
+
temp_path = None
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
# Validate source database integrity
|
| 179 |
+
if not self._validate_database(self.db_path):
|
| 180 |
+
raise DatabaseCorruptedError("Source database failed integrity check")
|
| 181 |
+
|
| 182 |
+
# Create backup using sqlite3.backup() API
|
| 183 |
+
temp_path = f"{self.db_path}.backup"
|
| 184 |
+
self._create_hot_backup(temp_path)
|
| 185 |
+
|
| 186 |
+
# Calculate checksum
|
| 187 |
+
checksum = self._calculate_checksum(temp_path)
|
| 188 |
+
|
| 189 |
+
# Upload to S3 with timestamp
|
| 190 |
+
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S')
|
| 191 |
+
s3_key = f"contacts-{timestamp}.db"
|
| 192 |
+
|
| 193 |
+
if not self._upload_to_s3(temp_path, s3_key, checksum):
|
| 194 |
+
return False
|
| 195 |
+
|
| 196 |
+
# Success
|
| 197 |
+
duration = time.time() - start_time
|
| 198 |
+
upload_size = os.path.getsize(temp_path)
|
| 199 |
+
s3_logger.backup_completed(duration, s3_key, upload_size)
|
| 200 |
+
logger.info(f"Backup completed successfully: {s3_key} ({duration:.2f}s)")
|
| 201 |
+
|
| 202 |
+
return True
|
| 203 |
+
|
| 204 |
+
except DatabaseCorruptedError as e:
|
| 205 |
+
s3_logger.backup_failed(str(e))
|
| 206 |
+
logger.error(f"Backup failed - database corrupted: {e}")
|
| 207 |
+
raise
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
s3_logger.backup_failed(str(e))
|
| 211 |
+
logger.error(f"Backup failed: {e}", exc_info=True)
|
| 212 |
+
return False
|
| 213 |
+
|
| 214 |
+
finally:
|
| 215 |
+
# Clean up temp file
|
| 216 |
+
if temp_path and os.path.exists(temp_path):
|
| 217 |
+
try:
|
| 218 |
+
os.remove(temp_path)
|
| 219 |
+
logger.debug(f"Cleaned up temp file: {temp_path}")
|
| 220 |
+
except OSError as e:
|
| 221 |
+
logger.warning(f"Failed to clean up temp file: {e}")
|
| 222 |
+
|
| 223 |
+
def _create_hot_backup(self, dest_path: str) -> None:
|
| 224 |
+
"""
|
| 225 |
+
Create a hot backup of the database using sqlite3.backup() API.
|
| 226 |
+
|
| 227 |
+
This method is safe to use while the database is being written to,
|
| 228 |
+
as sqlite3.backup() handles concurrent access properly.
|
| 229 |
+
|
| 230 |
+
Args:
|
| 231 |
+
dest_path: Path where backup should be created
|
| 232 |
+
"""
|
| 233 |
+
logger.debug(f"Creating hot backup to {dest_path}")
|
| 234 |
+
|
| 235 |
+
# Connect to source database
|
| 236 |
+
source_conn = sqlite3.connect(self.db_path)
|
| 237 |
+
|
| 238 |
+
try:
|
| 239 |
+
# Create destination connection
|
| 240 |
+
dest_conn = sqlite3.connect(dest_path)
|
| 241 |
+
|
| 242 |
+
try:
|
| 243 |
+
# Execute hot backup
|
| 244 |
+
source_conn.backup(dest_conn)
|
| 245 |
+
logger.debug("Hot backup completed")
|
| 246 |
+
finally:
|
| 247 |
+
dest_conn.close()
|
| 248 |
+
finally:
|
| 249 |
+
source_conn.close()
|
| 250 |
+
|
| 251 |
+
def _calculate_checksum(self, file_path: str) -> str:
|
| 252 |
+
"""
|
| 253 |
+
Calculate SHA-256 checksum of a file.
|
| 254 |
+
|
| 255 |
+
Args:
|
| 256 |
+
file_path: Path to file
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
Hexadecimal SHA-256 checksum string
|
| 260 |
+
"""
|
| 261 |
+
sha256_hash = hashlib.sha256()
|
| 262 |
+
|
| 263 |
+
with open(file_path, 'rb') as f:
|
| 264 |
+
# Read in chunks for memory efficiency
|
| 265 |
+
for chunk in iter(lambda: f.read(8192), b''):
|
| 266 |
+
sha256_hash.update(chunk)
|
| 267 |
+
|
| 268 |
+
checksum = sha256_hash.hexdigest()
|
| 269 |
+
logger.debug(f"Calculated checksum: {checksum}")
|
| 270 |
+
return checksum
|
| 271 |
+
|
| 272 |
+
def _upload_to_s3(
|
| 273 |
+
self,
|
| 274 |
+
file_path: str,
|
| 275 |
+
s3_key: str,
|
| 276 |
+
checksum: str,
|
| 277 |
+
max_retries: int = 3
|
| 278 |
+
) -> bool:
|
| 279 |
+
"""
|
| 280 |
+
Upload file to S3 with retry logic and exponential backoff.
|
| 281 |
+
|
| 282 |
+
Args:
|
| 283 |
+
file_path: Path to file to upload
|
| 284 |
+
s3_key: S3 object key
|
| 285 |
+
checksum: SHA-256 checksum to store in metadata
|
| 286 |
+
max_retries: Maximum number of retry attempts
|
| 287 |
+
|
| 288 |
+
Returns:
|
| 289 |
+
True if upload succeeded, False otherwise
|
| 290 |
+
"""
|
| 291 |
+
file_size = os.path.getsize(file_path)
|
| 292 |
+
|
| 293 |
+
for attempt in range(max_retries):
|
| 294 |
+
try:
|
| 295 |
+
logger.debug(f"Uploading to S3: {s3_key} (attempt {attempt + 1}/{max_retries})")
|
| 296 |
+
|
| 297 |
+
# Upload with metadata
|
| 298 |
+
with open(file_path, 'rb') as f:
|
| 299 |
+
self.s3_client.upload_fileobj(
|
| 300 |
+
f,
|
| 301 |
+
self.config.bucket,
|
| 302 |
+
s3_key,
|
| 303 |
+
ExtraArgs={
|
| 304 |
+
'Metadata': {
|
| 305 |
+
'sha256': checksum,
|
| 306 |
+
'source_host': os.uname().nodename,
|
| 307 |
+
'db_version': str(sqlite3.sqlite_version)
|
| 308 |
+
}
|
| 309 |
+
}
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
logger.info(f"Upload successful: {s3_key} ({file_size} bytes)")
|
| 313 |
+
return True
|
| 314 |
+
|
| 315 |
+
except ClientError as e:
|
| 316 |
+
error_code = e.response['Error']['Code']
|
| 317 |
+
|
| 318 |
+
# Permanent errors - don't retry
|
| 319 |
+
if error_code in ['NoSuchBucket', 'AccessDenied', 'InvalidAccessKeyId']:
|
| 320 |
+
s3_logger.backup_failed(error_code, attempt + 1, max_retries)
|
| 321 |
+
logger.error(f"Permanent S3 error: {error_code}")
|
| 322 |
+
|
| 323 |
+
if error_code == 'NoSuchBucket':
|
| 324 |
+
raise S3BucketNotFoundError(f"Bucket not found: {self.config.bucket}")
|
| 325 |
+
elif error_code in ['AccessDenied', 'InvalidAccessKeyId']:
|
| 326 |
+
raise S3CredentialsError(f"Invalid credentials: {error_code}")
|
| 327 |
+
|
| 328 |
+
return False
|
| 329 |
+
|
| 330 |
+
# Transient errors - retry with backoff
|
| 331 |
+
if attempt < max_retries - 1:
|
| 332 |
+
backoff = 2 ** attempt # 1s, 2s, 4s
|
| 333 |
+
logger.warning(
|
| 334 |
+
f"S3 upload failed (attempt {attempt + 1}/{max_retries}): {error_code}, "
|
| 335 |
+
f"retrying in {backoff}s"
|
| 336 |
+
)
|
| 337 |
+
time.sleep(backoff)
|
| 338 |
+
else:
|
| 339 |
+
s3_logger.backup_failed(error_code, attempt + 1, max_retries)
|
| 340 |
+
logger.error(f"S3 upload failed after {max_retries} attempts: {error_code}")
|
| 341 |
+
return False
|
| 342 |
+
|
| 343 |
+
except Exception as e:
|
| 344 |
+
s3_logger.backup_failed(str(e), attempt + 1, max_retries)
|
| 345 |
+
logger.error(f"Unexpected error during S3 upload: {e}", exc_info=True)
|
| 346 |
+
return False
|
| 347 |
+
|
| 348 |
+
return False
|
| 349 |
+
|
| 350 |
+
def _validate_database(self, db_path: str) -> bool:
|
| 351 |
+
"""
|
| 352 |
+
Validate SQLite database integrity.
|
| 353 |
+
|
| 354 |
+
Args:
|
| 355 |
+
db_path: Path to database file
|
| 356 |
+
|
| 357 |
+
Returns:
|
| 358 |
+
True if database passes integrity check, False otherwise
|
| 359 |
+
"""
|
| 360 |
+
try:
|
| 361 |
+
conn = sqlite3.connect(db_path)
|
| 362 |
+
cursor = conn.cursor()
|
| 363 |
+
cursor.execute("PRAGMA integrity_check")
|
| 364 |
+
result = cursor.fetchone()[0]
|
| 365 |
+
conn.close()
|
| 366 |
+
|
| 367 |
+
if result == 'ok':
|
| 368 |
+
logger.debug(f"Database integrity check passed: {db_path}")
|
| 369 |
+
return True
|
| 370 |
+
else:
|
| 371 |
+
logger.error(f"Database integrity check failed: {result}")
|
| 372 |
+
return False
|
| 373 |
+
|
| 374 |
+
except Exception as e:
|
| 375 |
+
logger.error(f"Database validation failed: {e}")
|
| 376 |
+
return False
|
| 377 |
+
|
| 378 |
+
def get_latest_backup(self) -> Optional[BackupMetadata]:
|
| 379 |
+
"""
|
| 380 |
+
Query S3 for the latest backup file.
|
| 381 |
+
|
| 382 |
+
Returns:
|
| 383 |
+
BackupMetadata object with latest backup info, or None if no backups found
|
| 384 |
+
|
| 385 |
+
Raises:
|
| 386 |
+
S3ConnectionError: Network or S3 service error
|
| 387 |
+
"""
|
| 388 |
+
if not self.config.enabled:
|
| 389 |
+
return None
|
| 390 |
+
|
| 391 |
+
try:
|
| 392 |
+
logger.debug(f"Querying S3 for latest backup in bucket: {self.config.bucket}")
|
| 393 |
+
|
| 394 |
+
response = self.s3_client.list_objects_v2(
|
| 395 |
+
Bucket=self.config.bucket,
|
| 396 |
+
Prefix='contacts-'
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
if 'Contents' not in response or len(response['Contents']) == 0:
|
| 400 |
+
logger.info("No backups found in S3")
|
| 401 |
+
return None
|
| 402 |
+
|
| 403 |
+
# Find latest by LastModified
|
| 404 |
+
latest = max(response['Contents'], key=lambda x: x['LastModified'])
|
| 405 |
+
|
| 406 |
+
# Get metadata if available
|
| 407 |
+
try:
|
| 408 |
+
head_response = self.s3_client.head_object(
|
| 409 |
+
Bucket=self.config.bucket,
|
| 410 |
+
Key=latest['Key']
|
| 411 |
+
)
|
| 412 |
+
checksum = head_response.get('Metadata', {}).get('sha256')
|
| 413 |
+
except Exception as e:
|
| 414 |
+
logger.warning(f"Failed to get metadata for {latest['Key']}: {e}")
|
| 415 |
+
checksum = None
|
| 416 |
+
|
| 417 |
+
metadata = BackupMetadata(
|
| 418 |
+
s3_key=latest['Key'],
|
| 419 |
+
last_modified=latest['LastModified'],
|
| 420 |
+
size_bytes=latest['Size'],
|
| 421 |
+
checksum_sha256=checksum
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
logger.info(f"Latest backup: {metadata.s3_key} ({metadata.last_modified})")
|
| 425 |
+
return metadata
|
| 426 |
+
|
| 427 |
+
except ClientError as e:
|
| 428 |
+
error_code = e.response['Error']['Code']
|
| 429 |
+
logger.error(f"S3 error querying backups: {error_code}")
|
| 430 |
+
raise S3ConnectionError(f"S3 error: {error_code}") from e
|
| 431 |
+
|
| 432 |
+
except Exception as e:
|
| 433 |
+
logger.error(f"Unexpected error querying backups: {e}")
|
| 434 |
+
raise S3ConnectionError(f"Error querying backups: {e}") from e
|
src/services/s3_config.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
S3 Configuration Module
|
| 3 |
+
|
| 4 |
+
Manages S3 backup/restore configuration loaded from environment variables.
|
| 5 |
+
Provides validation and credential checking for S3-compatible storage
|
| 6 |
+
(AWS S3, MinIO, DigitalOcean Spaces, etc.).
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import logging
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from typing import Optional
|
| 13 |
+
|
| 14 |
+
import boto3
|
| 15 |
+
from botocore.exceptions import ClientError
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# Custom Exceptions
|
| 22 |
+
class S3BackupError(Exception):
|
| 23 |
+
"""Base exception for S3 backup operations"""
|
| 24 |
+
pass
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class S3CredentialsError(S3BackupError):
|
| 28 |
+
"""Invalid S3 credentials"""
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class S3BucketNotFoundError(S3BackupError):
|
| 33 |
+
"""S3 bucket does not exist"""
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class DatabaseCorruptedError(S3BackupError):
|
| 38 |
+
"""SQLite database failed integrity check"""
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class S3ConnectionError(S3BackupError):
|
| 43 |
+
"""Network error connecting to S3"""
|
| 44 |
+
pass
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class RestoreError(S3BackupError):
|
| 48 |
+
"""Critical error during restore that prevents startup"""
|
| 49 |
+
pass
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@dataclass
|
| 53 |
+
class S3Config:
|
| 54 |
+
"""
|
| 55 |
+
S3 configuration for backup/restore operations.
|
| 56 |
+
|
| 57 |
+
Attributes:
|
| 58 |
+
enabled: Whether S3 backup/restore is enabled
|
| 59 |
+
bucket: S3 bucket name
|
| 60 |
+
access_key: AWS access key ID
|
| 61 |
+
secret_key: AWS secret access key
|
| 62 |
+
region: AWS region (default: us-east-1)
|
| 63 |
+
endpoint_url: Custom S3 endpoint for MinIO/DigitalOcean Spaces
|
| 64 |
+
upload_timeout: Upload timeout in seconds
|
| 65 |
+
download_timeout: Download timeout in seconds
|
| 66 |
+
debounce_seconds: Debounce period for backup requests
|
| 67 |
+
"""
|
| 68 |
+
enabled: bool
|
| 69 |
+
bucket: Optional[str] = None
|
| 70 |
+
access_key: Optional[str] = None
|
| 71 |
+
secret_key: Optional[str] = None
|
| 72 |
+
region: str = "us-east-1"
|
| 73 |
+
endpoint_url: Optional[str] = None
|
| 74 |
+
upload_timeout: int = 60
|
| 75 |
+
download_timeout: int = 30
|
| 76 |
+
debounce_seconds: int = 300
|
| 77 |
+
|
| 78 |
+
@staticmethod
|
| 79 |
+
def from_env() -> 'S3Config':
|
| 80 |
+
"""
|
| 81 |
+
Factory method to create S3Config from environment variables.
|
| 82 |
+
|
| 83 |
+
Environment Variables:
|
| 84 |
+
S3_BACKUP_ENABLED: "true" or "false" (default: "false")
|
| 85 |
+
S3_BUCKET_NAME: S3 bucket name (required if enabled)
|
| 86 |
+
S3_ACCESS_KEY: AWS access key ID (required if enabled)
|
| 87 |
+
S3_SECRET_KEY: AWS secret access key (required if enabled)
|
| 88 |
+
S3_REGION: AWS region (default: "us-east-1")
|
| 89 |
+
S3_ENDPOINT_URL: Custom S3 endpoint (optional)
|
| 90 |
+
S3_UPLOAD_TIMEOUT: Upload timeout in seconds (default: 60)
|
| 91 |
+
S3_DOWNLOAD_TIMEOUT: Download timeout in seconds (default: 30)
|
| 92 |
+
S3_DEBOUNCE_SECONDS: Debounce period in seconds (default: 300)
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
S3Config instance with enabled=False if configuration is incomplete
|
| 96 |
+
"""
|
| 97 |
+
enabled = os.getenv('S3_BACKUP_ENABLED', 'false').lower() == 'true'
|
| 98 |
+
|
| 99 |
+
if not enabled:
|
| 100 |
+
logger.info("S3 backup/restore disabled (S3_BACKUP_ENABLED not set)")
|
| 101 |
+
return S3Config(enabled=False)
|
| 102 |
+
|
| 103 |
+
# Load required configuration
|
| 104 |
+
bucket = os.getenv('S3_BUCKET_NAME')
|
| 105 |
+
access_key = os.getenv('S3_ACCESS_KEY')
|
| 106 |
+
secret_key = os.getenv('S3_SECRET_KEY')
|
| 107 |
+
|
| 108 |
+
# Validate required fields
|
| 109 |
+
missing = []
|
| 110 |
+
if not bucket:
|
| 111 |
+
missing.append('S3_BUCKET_NAME')
|
| 112 |
+
if not access_key:
|
| 113 |
+
missing.append('S3_ACCESS_KEY')
|
| 114 |
+
if not secret_key:
|
| 115 |
+
missing.append('S3_SECRET_KEY')
|
| 116 |
+
|
| 117 |
+
if missing:
|
| 118 |
+
logger.warning(
|
| 119 |
+
f"S3 backup disabled - missing required configuration: {', '.join(missing)}"
|
| 120 |
+
)
|
| 121 |
+
return S3Config(enabled=False)
|
| 122 |
+
|
| 123 |
+
# Load optional configuration with defaults
|
| 124 |
+
region = os.getenv('S3_REGION', 'us-east-1')
|
| 125 |
+
endpoint_url = os.getenv('S3_ENDPOINT_URL') # None for AWS S3
|
| 126 |
+
|
| 127 |
+
try:
|
| 128 |
+
upload_timeout = int(os.getenv('S3_UPLOAD_TIMEOUT', '60'))
|
| 129 |
+
download_timeout = int(os.getenv('S3_DOWNLOAD_TIMEOUT', '30'))
|
| 130 |
+
debounce_seconds = int(os.getenv('S3_DEBOUNCE_SECONDS', '300'))
|
| 131 |
+
except ValueError as e:
|
| 132 |
+
logger.warning(f"Invalid timeout configuration: {e}, using defaults")
|
| 133 |
+
upload_timeout = 60
|
| 134 |
+
download_timeout = 30
|
| 135 |
+
debounce_seconds = 300
|
| 136 |
+
|
| 137 |
+
config = S3Config(
|
| 138 |
+
enabled=True,
|
| 139 |
+
bucket=bucket,
|
| 140 |
+
access_key=access_key,
|
| 141 |
+
secret_key=secret_key,
|
| 142 |
+
region=region,
|
| 143 |
+
endpoint_url=endpoint_url,
|
| 144 |
+
upload_timeout=upload_timeout,
|
| 145 |
+
download_timeout=download_timeout,
|
| 146 |
+
debounce_seconds=debounce_seconds
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
logger.info(
|
| 150 |
+
f"S3 backup enabled - bucket: {bucket}, region: {region}, "
|
| 151 |
+
f"endpoint: {endpoint_url or 'AWS S3'}"
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
return config
|
| 155 |
+
|
| 156 |
+
def validate_credentials(self) -> bool:
|
| 157 |
+
"""
|
| 158 |
+
Test S3 credentials by performing a HeadBucket operation.
|
| 159 |
+
|
| 160 |
+
Returns:
|
| 161 |
+
True if credentials are valid and bucket is accessible
|
| 162 |
+
False if credentials are invalid or bucket not found
|
| 163 |
+
|
| 164 |
+
Raises:
|
| 165 |
+
S3ConnectionError: Network or S3 service error
|
| 166 |
+
"""
|
| 167 |
+
if not self.enabled:
|
| 168 |
+
return False
|
| 169 |
+
|
| 170 |
+
try:
|
| 171 |
+
s3_client = boto3.client(
|
| 172 |
+
's3',
|
| 173 |
+
endpoint_url=self.endpoint_url,
|
| 174 |
+
aws_access_key_id=self.access_key,
|
| 175 |
+
aws_secret_access_key=self.secret_key,
|
| 176 |
+
region_name=self.region
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# HeadBucket validates both credentials and bucket existence
|
| 180 |
+
s3_client.head_bucket(Bucket=self.bucket)
|
| 181 |
+
logger.info(f"S3 credentials validated - bucket '{self.bucket}' is accessible")
|
| 182 |
+
return True
|
| 183 |
+
|
| 184 |
+
except ClientError as e:
|
| 185 |
+
error_code = e.response['Error']['Code']
|
| 186 |
+
|
| 187 |
+
if error_code == '404':
|
| 188 |
+
logger.error(f"S3 bucket not found: {self.bucket}")
|
| 189 |
+
return False
|
| 190 |
+
elif error_code == '403':
|
| 191 |
+
logger.error("S3 credentials invalid or insufficient permissions")
|
| 192 |
+
return False
|
| 193 |
+
else:
|
| 194 |
+
logger.error(f"S3 error during credential validation: {error_code}")
|
| 195 |
+
raise S3ConnectionError(f"S3 error: {error_code}") from e
|
| 196 |
+
|
| 197 |
+
except Exception as e:
|
| 198 |
+
logger.error(f"Unexpected error during S3 credential validation: {e}")
|
| 199 |
+
raise S3ConnectionError(f"S3 connection error: {e}") from e
|
| 200 |
+
|
| 201 |
+
def create_s3_client(self):
|
| 202 |
+
"""
|
| 203 |
+
Create a boto3 S3 client with this configuration.
|
| 204 |
+
|
| 205 |
+
Returns:
|
| 206 |
+
boto3.client instance configured for S3
|
| 207 |
+
"""
|
| 208 |
+
if not self.enabled:
|
| 209 |
+
raise S3CredentialsError("S3 backup is not enabled")
|
| 210 |
+
|
| 211 |
+
from botocore.config import Config
|
| 212 |
+
|
| 213 |
+
boto_config = Config(
|
| 214 |
+
connect_timeout=5,
|
| 215 |
+
read_timeout=self.download_timeout,
|
| 216 |
+
retries={'max_attempts': 0} # We handle retries manually
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
return boto3.client(
|
| 220 |
+
's3',
|
| 221 |
+
endpoint_url=self.endpoint_url,
|
| 222 |
+
aws_access_key_id=self.access_key,
|
| 223 |
+
aws_secret_access_key=self.secret_key,
|
| 224 |
+
region_name=self.region,
|
| 225 |
+
config=boto_config
|
| 226 |
+
)
|
src/services/s3_restore.py
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
S3 Restore Manager
|
| 3 |
+
|
| 4 |
+
Handles automatic restore of SQLite database from S3-compatible storage
|
| 5 |
+
during webapp startup. Provides validation and fallback logic to ensure
|
| 6 |
+
reliable database restoration.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import sqlite3
|
| 11 |
+
import hashlib
|
| 12 |
+
import logging
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
from typing import Optional
|
| 15 |
+
from enum import Enum
|
| 16 |
+
|
| 17 |
+
import boto3
|
| 18 |
+
from botocore.exceptions import ClientError
|
| 19 |
+
|
| 20 |
+
from ..utils import s3_logger
|
| 21 |
+
from .s3_config import (
|
| 22 |
+
S3Config,
|
| 23 |
+
S3ConnectionError,
|
| 24 |
+
RestoreError,
|
| 25 |
+
DatabaseCorruptedError
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger(__name__)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class RestoreResult(Enum):
|
| 33 |
+
"""Result of restore operation."""
|
| 34 |
+
RESTORED_FROM_S3 = "restored_from_s3"
|
| 35 |
+
LOCAL_NEWER = "local_newer"
|
| 36 |
+
NO_BACKUP_FOUND = "no_backup_found"
|
| 37 |
+
VALIDATION_FAILED = "validation_failed"
|
| 38 |
+
NETWORK_ERROR = "network_error"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class RestoreManager:
|
| 42 |
+
"""
|
| 43 |
+
Manages automatic restore of SQLite database from S3.
|
| 44 |
+
|
| 45 |
+
Features:
|
| 46 |
+
- Startup restore with validation
|
| 47 |
+
- Timestamp comparison (local vs S3)
|
| 48 |
+
- Checksum verification
|
| 49 |
+
- SQLite integrity check
|
| 50 |
+
- Atomic file replacement
|
| 51 |
+
- Fallback chain for reliability
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
def __init__(self, config: S3Config, db_path: str):
|
| 55 |
+
"""
|
| 56 |
+
Initialize the restore manager.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
config: S3 configuration object
|
| 60 |
+
db_path: Absolute path where database should be restored
|
| 61 |
+
"""
|
| 62 |
+
self.config = config
|
| 63 |
+
self.db_path = db_path
|
| 64 |
+
|
| 65 |
+
if config.enabled:
|
| 66 |
+
self.s3_client = config.create_s3_client()
|
| 67 |
+
logger.info(f"RestoreManager initialized for {db_path}")
|
| 68 |
+
else:
|
| 69 |
+
self.s3_client = None
|
| 70 |
+
logger.info("RestoreManager initialized but S3 is disabled")
|
| 71 |
+
|
| 72 |
+
def restore_from_s3(self) -> RestoreResult:
|
| 73 |
+
"""
|
| 74 |
+
Restore database from S3 with validation and fallback logic.
|
| 75 |
+
|
| 76 |
+
Process:
|
| 77 |
+
1. List backups in S3
|
| 78 |
+
2. Find latest by LastModified
|
| 79 |
+
3. Compare timestamps (local vs S3)
|
| 80 |
+
4. Download if S3 is newer
|
| 81 |
+
5. Validate checksum and integrity
|
| 82 |
+
6. Atomic replace local file
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
RestoreResult enum indicating the outcome
|
| 86 |
+
|
| 87 |
+
Side Effects:
|
| 88 |
+
- May replace local database file atomically
|
| 89 |
+
- Creates temp files during download (cleaned up automatically)
|
| 90 |
+
- Logs all operations with structured logging
|
| 91 |
+
"""
|
| 92 |
+
start_time = datetime.now()
|
| 93 |
+
s3_logger.restore_started()
|
| 94 |
+
|
| 95 |
+
if not self.config.enabled:
|
| 96 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 97 |
+
s3_logger.restore_completed(duration, None, None, RestoreResult.NO_BACKUP_FOUND.value)
|
| 98 |
+
logger.info("Restore skipped - S3 disabled")
|
| 99 |
+
return RestoreResult.NO_BACKUP_FOUND
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
# List backups from S3
|
| 103 |
+
backups = self._list_backups()
|
| 104 |
+
|
| 105 |
+
if not backups or len(backups) == 0:
|
| 106 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 107 |
+
s3_logger.restore_completed(duration, None, None, RestoreResult.NO_BACKUP_FOUND.value)
|
| 108 |
+
logger.info("No backups found in S3")
|
| 109 |
+
return RestoreResult.NO_BACKUP_FOUND
|
| 110 |
+
|
| 111 |
+
# Find latest backup
|
| 112 |
+
latest_backup = max(backups, key=lambda x: x['LastModified'])
|
| 113 |
+
s3_key = latest_backup['Key']
|
| 114 |
+
s3_timestamp = latest_backup['LastModified']
|
| 115 |
+
|
| 116 |
+
logger.info(f"Latest S3 backup: {s3_key} ({s3_timestamp})")
|
| 117 |
+
|
| 118 |
+
# Compare with local file timestamp
|
| 119 |
+
if os.path.exists(self.db_path):
|
| 120 |
+
local_mtime = datetime.fromtimestamp(os.path.getmtime(self.db_path))
|
| 121 |
+
local_mtime = local_mtime.replace(tzinfo=s3_timestamp.tzinfo) # Make timezone-aware
|
| 122 |
+
|
| 123 |
+
if local_mtime >= s3_timestamp:
|
| 124 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 125 |
+
s3_logger.restore_completed(duration, s3_key, None, RestoreResult.LOCAL_NEWER.value)
|
| 126 |
+
logger.info(f"Local database is newer ({local_mtime} >= {s3_timestamp}), skipping restore")
|
| 127 |
+
return RestoreResult.LOCAL_NEWER
|
| 128 |
+
|
| 129 |
+
# Download from S3
|
| 130 |
+
temp_path = f"{self.db_path}.restore"
|
| 131 |
+
download_size = self._download_backup(s3_key, temp_path)
|
| 132 |
+
|
| 133 |
+
# Validate backup
|
| 134 |
+
if not self.validate_backup(temp_path, s3_key):
|
| 135 |
+
# Validation failed - use local fallback
|
| 136 |
+
if os.path.exists(temp_path):
|
| 137 |
+
os.remove(temp_path)
|
| 138 |
+
|
| 139 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 140 |
+
s3_logger.restore_fallback("validation_failed", "using_local_database")
|
| 141 |
+
s3_logger.restore_completed(duration, s3_key, None, RestoreResult.VALIDATION_FAILED.value)
|
| 142 |
+
logger.warning("Backup validation failed, using local database")
|
| 143 |
+
return RestoreResult.VALIDATION_FAILED
|
| 144 |
+
|
| 145 |
+
# Atomic replace
|
| 146 |
+
self._atomic_replace(temp_path)
|
| 147 |
+
|
| 148 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 149 |
+
s3_logger.restore_completed(duration, s3_key, download_size, RestoreResult.RESTORED_FROM_S3.value)
|
| 150 |
+
logger.info(f"Restore completed successfully from {s3_key} ({duration:.2f}s)")
|
| 151 |
+
|
| 152 |
+
return RestoreResult.RESTORED_FROM_S3
|
| 153 |
+
|
| 154 |
+
except S3ConnectionError as e:
|
| 155 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 156 |
+
s3_logger.restore_fallback("network_error", "using_local_database")
|
| 157 |
+
s3_logger.restore_completed(duration, None, None, RestoreResult.NETWORK_ERROR.value)
|
| 158 |
+
logger.error(f"Network error during restore: {e}")
|
| 159 |
+
return RestoreResult.NETWORK_ERROR
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
duration = (datetime.now() - start_time).total_seconds()
|
| 163 |
+
s3_logger.restore_fallback(str(e), "using_local_database")
|
| 164 |
+
s3_logger.restore_completed(duration, None, None, RestoreResult.NETWORK_ERROR.value)
|
| 165 |
+
logger.error(f"Unexpected error during restore: {e}", exc_info=True)
|
| 166 |
+
return RestoreResult.NETWORK_ERROR
|
| 167 |
+
|
| 168 |
+
def _list_backups(self) -> list:
|
| 169 |
+
"""
|
| 170 |
+
List all backup files in S3.
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
List of S3 object dictionaries with Key, LastModified, Size
|
| 174 |
+
|
| 175 |
+
Raises:
|
| 176 |
+
S3ConnectionError: Network or S3 service error
|
| 177 |
+
"""
|
| 178 |
+
try:
|
| 179 |
+
logger.debug(f"Listing backups in bucket: {self.config.bucket}")
|
| 180 |
+
|
| 181 |
+
response = self.s3_client.list_objects_v2(
|
| 182 |
+
Bucket=self.config.bucket,
|
| 183 |
+
Prefix='contacts-'
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if 'Contents' not in response:
|
| 187 |
+
return []
|
| 188 |
+
|
| 189 |
+
return response['Contents']
|
| 190 |
+
|
| 191 |
+
except ClientError as e:
|
| 192 |
+
error_code = e.response['Error']['Code']
|
| 193 |
+
logger.error(f"S3 error listing backups: {error_code}")
|
| 194 |
+
raise S3ConnectionError(f"S3 error: {error_code}") from e
|
| 195 |
+
|
| 196 |
+
except Exception as e:
|
| 197 |
+
logger.error(f"Unexpected error listing backups: {e}")
|
| 198 |
+
raise S3ConnectionError(f"Error listing backups: {e}") from e
|
| 199 |
+
|
| 200 |
+
def _compare_timestamps(self, local_path: str, s3_timestamp: datetime) -> bool:
|
| 201 |
+
"""
|
| 202 |
+
Compare local file timestamp with S3 backup timestamp.
|
| 203 |
+
|
| 204 |
+
Args:
|
| 205 |
+
local_path: Path to local database file
|
| 206 |
+
s3_timestamp: LastModified timestamp from S3
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
True if S3 backup is newer, False if local is newer or equal
|
| 210 |
+
"""
|
| 211 |
+
if not os.path.exists(local_path):
|
| 212 |
+
return True # No local file, S3 is "newer"
|
| 213 |
+
|
| 214 |
+
local_mtime = datetime.fromtimestamp(os.path.getmtime(local_path))
|
| 215 |
+
local_mtime = local_mtime.replace(tzinfo=s3_timestamp.tzinfo)
|
| 216 |
+
|
| 217 |
+
logger.debug(f"Timestamp comparison - Local: {local_mtime}, S3: {s3_timestamp}")
|
| 218 |
+
return s3_timestamp > local_mtime
|
| 219 |
+
|
| 220 |
+
def _download_backup(self, s3_key: str, dest_path: str) -> int:
|
| 221 |
+
"""
|
| 222 |
+
Download backup file from S3.
|
| 223 |
+
|
| 224 |
+
Args:
|
| 225 |
+
s3_key: S3 object key
|
| 226 |
+
dest_path: Local path where file should be saved
|
| 227 |
+
|
| 228 |
+
Returns:
|
| 229 |
+
Size of downloaded file in bytes
|
| 230 |
+
|
| 231 |
+
Raises:
|
| 232 |
+
S3ConnectionError: Network or S3 service error
|
| 233 |
+
"""
|
| 234 |
+
try:
|
| 235 |
+
logger.info(f"Downloading backup from S3: {s3_key}")
|
| 236 |
+
|
| 237 |
+
self.s3_client.download_file(
|
| 238 |
+
self.config.bucket,
|
| 239 |
+
s3_key,
|
| 240 |
+
dest_path
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
file_size = os.path.getsize(dest_path)
|
| 244 |
+
logger.info(f"Download completed: {file_size} bytes")
|
| 245 |
+
return file_size
|
| 246 |
+
|
| 247 |
+
except ClientError as e:
|
| 248 |
+
error_code = e.response['Error']['Code']
|
| 249 |
+
logger.error(f"S3 error downloading backup: {error_code}")
|
| 250 |
+
raise S3ConnectionError(f"S3 error: {error_code}") from e
|
| 251 |
+
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.error(f"Unexpected error downloading backup: {e}")
|
| 254 |
+
raise S3ConnectionError(f"Error downloading backup: {e}") from e
|
| 255 |
+
|
| 256 |
+
def _validate_checksum(self, file_path: str, s3_key: str) -> bool:
|
| 257 |
+
"""
|
| 258 |
+
Validate file checksum against S3 metadata.
|
| 259 |
+
|
| 260 |
+
Args:
|
| 261 |
+
file_path: Path to downloaded file
|
| 262 |
+
s3_key: S3 object key
|
| 263 |
+
|
| 264 |
+
Returns:
|
| 265 |
+
True if checksum matches or no checksum in metadata, False if mismatch
|
| 266 |
+
"""
|
| 267 |
+
try:
|
| 268 |
+
# Get S3 object metadata
|
| 269 |
+
response = self.s3_client.head_object(
|
| 270 |
+
Bucket=self.config.bucket,
|
| 271 |
+
Key=s3_key
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
remote_checksum = response.get('Metadata', {}).get('sha256')
|
| 275 |
+
|
| 276 |
+
if not remote_checksum:
|
| 277 |
+
logger.debug("No checksum in S3 metadata, skipping validation")
|
| 278 |
+
return True
|
| 279 |
+
|
| 280 |
+
# Calculate local checksum
|
| 281 |
+
sha256_hash = hashlib.sha256()
|
| 282 |
+
with open(file_path, 'rb') as f:
|
| 283 |
+
for chunk in iter(lambda: f.read(8192), b''):
|
| 284 |
+
sha256_hash.update(chunk)
|
| 285 |
+
|
| 286 |
+
local_checksum = sha256_hash.hexdigest()
|
| 287 |
+
|
| 288 |
+
if local_checksum == remote_checksum:
|
| 289 |
+
logger.debug("Checksum validation passed")
|
| 290 |
+
return True
|
| 291 |
+
else:
|
| 292 |
+
logger.error(f"Checksum mismatch - Local: {local_checksum}, Remote: {remote_checksum}")
|
| 293 |
+
return False
|
| 294 |
+
|
| 295 |
+
except Exception as e:
|
| 296 |
+
logger.warning(f"Checksum validation failed: {e}")
|
| 297 |
+
return False
|
| 298 |
+
|
| 299 |
+
def _validate_sqlite_integrity(self, file_path: str) -> bool:
|
| 300 |
+
"""
|
| 301 |
+
Validate SQLite database integrity using PRAGMA integrity_check.
|
| 302 |
+
|
| 303 |
+
Args:
|
| 304 |
+
file_path: Path to database file
|
| 305 |
+
|
| 306 |
+
Returns:
|
| 307 |
+
True if database passes integrity check, False otherwise
|
| 308 |
+
"""
|
| 309 |
+
try:
|
| 310 |
+
conn = sqlite3.connect(file_path)
|
| 311 |
+
cursor = conn.cursor()
|
| 312 |
+
cursor.execute("PRAGMA integrity_check")
|
| 313 |
+
result = cursor.fetchone()[0]
|
| 314 |
+
conn.close()
|
| 315 |
+
|
| 316 |
+
if result == 'ok':
|
| 317 |
+
logger.debug("SQLite integrity check passed")
|
| 318 |
+
return True
|
| 319 |
+
else:
|
| 320 |
+
logger.error(f"SQLite integrity check failed: {result}")
|
| 321 |
+
return False
|
| 322 |
+
|
| 323 |
+
except Exception as e:
|
| 324 |
+
logger.error(f"SQLite integrity check failed: {e}")
|
| 325 |
+
return False
|
| 326 |
+
|
| 327 |
+
def validate_backup(self, file_path: str, s3_key: Optional[str] = None) -> bool:
|
| 328 |
+
"""
|
| 329 |
+
Validate a backup file's integrity.
|
| 330 |
+
|
| 331 |
+
Performs both checksum validation (if S3 metadata available) and
|
| 332 |
+
SQLite integrity check.
|
| 333 |
+
|
| 334 |
+
Args:
|
| 335 |
+
file_path: Path to backup file to validate
|
| 336 |
+
s3_key: Optional S3 key for checksum validation
|
| 337 |
+
|
| 338 |
+
Returns:
|
| 339 |
+
True if file is valid, False otherwise
|
| 340 |
+
"""
|
| 341 |
+
logger.info(f"Validating backup: {file_path}")
|
| 342 |
+
|
| 343 |
+
# Validate checksum if S3 key provided
|
| 344 |
+
if s3_key and not self._validate_checksum(file_path, s3_key):
|
| 345 |
+
return False
|
| 346 |
+
|
| 347 |
+
# Validate SQLite integrity
|
| 348 |
+
if not self._validate_sqlite_integrity(file_path):
|
| 349 |
+
return False
|
| 350 |
+
|
| 351 |
+
logger.info("Backup validation passed")
|
| 352 |
+
return True
|
| 353 |
+
|
| 354 |
+
def _atomic_replace(self, temp_path: str) -> None:
|
| 355 |
+
"""
|
| 356 |
+
Atomically replace local database file with validated backup.
|
| 357 |
+
|
| 358 |
+
Uses os.replace() which is atomic on POSIX systems.
|
| 359 |
+
|
| 360 |
+
Args:
|
| 361 |
+
temp_path: Path to validated backup file
|
| 362 |
+
|
| 363 |
+
Raises:
|
| 364 |
+
OSError: If atomic replace fails
|
| 365 |
+
"""
|
| 366 |
+
try:
|
| 367 |
+
logger.info(f"Replacing database: {self.db_path}")
|
| 368 |
+
|
| 369 |
+
# Create backup directory if it doesn't exist
|
| 370 |
+
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
|
| 371 |
+
|
| 372 |
+
# Atomic replace
|
| 373 |
+
os.replace(temp_path, self.db_path)
|
| 374 |
+
|
| 375 |
+
logger.info("Database replaced successfully")
|
| 376 |
+
|
| 377 |
+
except OSError as e:
|
| 378 |
+
logger.error(f"Failed to replace database: {e}")
|
| 379 |
+
raise
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
def restore_on_startup() -> RestoreResult:
|
| 383 |
+
"""
|
| 384 |
+
Convenience function to restore database on webapp startup.
|
| 385 |
+
|
| 386 |
+
This function is called from entrypoint.sh before Flask starts.
|
| 387 |
+
|
| 388 |
+
Returns:
|
| 389 |
+
RestoreResult enum indicating the outcome
|
| 390 |
+
"""
|
| 391 |
+
try:
|
| 392 |
+
from .s3_config import S3Config
|
| 393 |
+
|
| 394 |
+
config = S3Config.from_env()
|
| 395 |
+
db_path = os.getenv("DATABASE_PATH", "/app/data/contacts.db")
|
| 396 |
+
|
| 397 |
+
restore_manager = RestoreManager(config, db_path)
|
| 398 |
+
result = restore_manager.restore_from_s3()
|
| 399 |
+
|
| 400 |
+
logger.info(f"Startup restore completed: {result.value}")
|
| 401 |
+
return result
|
| 402 |
+
|
| 403 |
+
except Exception as e:
|
| 404 |
+
logger.error(f"Startup restore failed: {e}", exc_info=True)
|
| 405 |
+
s3_logger.restore_fallback(str(e), "using_local_or_empty_database")
|
| 406 |
+
return RestoreResult.NETWORK_ERROR
|
src/services/storage_service.py
CHANGED
|
@@ -2,6 +2,20 @@
|
|
| 2 |
SQLite storage service for user profiles and contact sessions.
|
| 3 |
Feature: 012-profile-contact-ui
|
| 4 |
Feature: 001-refine-memory-producer-logic (producer_id generation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
@@ -20,6 +34,37 @@ class NotFoundError(Exception):
|
|
| 20 |
pass
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def get_db_connection() -> sqlite3.Connection:
|
| 24 |
"""Get SQLite database connection."""
|
| 25 |
db_path = os.getenv("DATABASE_PATH", "data/contacts.db")
|
|
@@ -136,6 +181,11 @@ def create_or_update_user(
|
|
| 136 |
# Fetch created profile
|
| 137 |
cursor.execute("SELECT * FROM user_profiles WHERE user_id = ?", (user_id,))
|
| 138 |
row = cursor.fetchone()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
conn.close()
|
| 141 |
|
|
@@ -235,13 +285,14 @@ def create_contact_session(
|
|
| 235 |
cursor.execute(
|
| 236 |
"""
|
| 237 |
INSERT INTO contact_sessions
|
| 238 |
-
(session_id, user_id, contact_name, contact_description, is_reference,
|
| 239 |
created_at, last_interaction, normalized_name, sequence_number, producer_id)
|
| 240 |
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 241 |
""",
|
| 242 |
(
|
| 243 |
session_id,
|
| 244 |
user_id,
|
|
|
|
| 245 |
contact_name,
|
| 246 |
contact_description,
|
| 247 |
is_reference,
|
|
@@ -262,6 +313,7 @@ def create_contact_session(
|
|
| 262 |
return ContactSession(
|
| 263 |
session_id=row["session_id"],
|
| 264 |
user_id=row["user_id"],
|
|
|
|
| 265 |
contact_name=row["contact_name"],
|
| 266 |
contact_description=row["contact_description"],
|
| 267 |
is_reference=bool(row["is_reference"]),
|
|
@@ -343,13 +395,14 @@ def create_contact_session_with_id(
|
|
| 343 |
cursor.execute(
|
| 344 |
"""
|
| 345 |
INSERT INTO contact_sessions
|
| 346 |
-
(session_id, user_id, contact_name, contact_description, is_reference,
|
| 347 |
created_at, last_interaction, normalized_name, sequence_number, producer_id)
|
| 348 |
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 349 |
""",
|
| 350 |
(
|
| 351 |
session_id,
|
| 352 |
user_id,
|
|
|
|
| 353 |
contact_name,
|
| 354 |
contact_description,
|
| 355 |
is_reference,
|
|
@@ -366,10 +419,16 @@ def create_contact_session_with_id(
|
|
| 366 |
cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
|
| 367 |
row = cursor.fetchone()
|
| 368 |
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
return ContactSession(
|
| 371 |
session_id=row["session_id"],
|
| 372 |
user_id=row["user_id"],
|
|
|
|
| 373 |
contact_name=row["contact_name"],
|
| 374 |
contact_description=row["contact_description"],
|
| 375 |
is_reference=bool(row["is_reference"]),
|
|
@@ -438,6 +497,7 @@ def list_contact_sessions(
|
|
| 438 |
ContactSession(
|
| 439 |
session_id=row["session_id"],
|
| 440 |
user_id=row["user_id"],
|
|
|
|
| 441 |
contact_name=row["contact_name"],
|
| 442 |
contact_description=row["contact_description"],
|
| 443 |
is_reference=bool(row["is_reference"]),
|
|
@@ -483,6 +543,7 @@ def get_contact_session(session_id: str) -> Optional[ContactSession]:
|
|
| 483 |
return ContactSession(
|
| 484 |
session_id=row["session_id"],
|
| 485 |
user_id=row["user_id"],
|
|
|
|
| 486 |
contact_name=row["contact_name"],
|
| 487 |
contact_description=row["contact_description"],
|
| 488 |
is_reference=bool(row["is_reference"]),
|
|
@@ -551,6 +612,12 @@ def update_contact_session(
|
|
| 551 |
query = f"UPDATE contact_sessions SET {', '.join(updates)} WHERE session_id = ?"
|
| 552 |
cursor.execute(query, params)
|
| 553 |
conn.commit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
# Fetch updated session
|
| 556 |
cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
|
|
|
|
| 2 |
SQLite storage service for user profiles and contact sessions.
|
| 3 |
Feature: 012-profile-contact-ui
|
| 4 |
Feature: 001-refine-memory-producer-logic (producer_id generation)
|
| 5 |
+
Feature: 015-sqlite-s3-backup (S3 backup triggers)
|
| 6 |
+
|
| 7 |
+
S3 Backup Trigger Policy:
|
| 8 |
+
- Backup IS triggered for:
|
| 9 |
+
- New user profile creation (create_or_update_user)
|
| 10 |
+
- New contact session creation (create_contact_session_with_id)
|
| 11 |
+
- Contact metadata changes (update_contact_session: name/description)
|
| 12 |
+
|
| 13 |
+
- Backup is NOT triggered for:
|
| 14 |
+
- User login/last_login updates (create_or_update_user on existing user)
|
| 15 |
+
- last_interaction timestamp updates (update_contact_last_interaction)
|
| 16 |
+
- Message count increments or other ephemeral data
|
| 17 |
+
|
| 18 |
+
This policy optimizes S3 costs by only backing up when critical data changes.
|
| 19 |
"""
|
| 20 |
|
| 21 |
import os
|
|
|
|
| 34 |
pass
|
| 35 |
|
| 36 |
|
| 37 |
+
# Global S3 backup manager (initialized on first use)
|
| 38 |
+
_backup_manager = None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _get_backup_manager():
|
| 42 |
+
"""
|
| 43 |
+
Lazy initialization of BackupManager.
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
BackupManager instance or None if S3 is disabled
|
| 47 |
+
"""
|
| 48 |
+
global _backup_manager
|
| 49 |
+
|
| 50 |
+
if _backup_manager is None:
|
| 51 |
+
try:
|
| 52 |
+
from .s3_config import S3Config
|
| 53 |
+
from .s3_backup import BackupManager
|
| 54 |
+
|
| 55 |
+
config = S3Config.from_env()
|
| 56 |
+
db_path = os.getenv("DATABASE_PATH", "data/contacts.db")
|
| 57 |
+
_backup_manager = BackupManager(config, db_path)
|
| 58 |
+
except Exception as e:
|
| 59 |
+
# Log but don't crash if backup initialization fails
|
| 60 |
+
import logging
|
| 61 |
+
logger = logging.getLogger(__name__)
|
| 62 |
+
logger.warning(f"Failed to initialize S3 backup: {e}")
|
| 63 |
+
_backup_manager = None
|
| 64 |
+
|
| 65 |
+
return _backup_manager
|
| 66 |
+
|
| 67 |
+
|
| 68 |
def get_db_connection() -> sqlite3.Connection:
|
| 69 |
"""Get SQLite database connection."""
|
| 70 |
db_path = os.getenv("DATABASE_PATH", "data/contacts.db")
|
|
|
|
| 181 |
# Fetch created profile
|
| 182 |
cursor.execute("SELECT * FROM user_profiles WHERE user_id = ?", (user_id,))
|
| 183 |
row = cursor.fetchone()
|
| 184 |
+
|
| 185 |
+
# Trigger S3 backup for new user creation (Feature 015)
|
| 186 |
+
backup_manager = _get_backup_manager()
|
| 187 |
+
if backup_manager:
|
| 188 |
+
backup_manager.request_backup()
|
| 189 |
|
| 190 |
conn.close()
|
| 191 |
|
|
|
|
| 285 |
cursor.execute(
|
| 286 |
"""
|
| 287 |
INSERT INTO contact_sessions
|
| 288 |
+
(session_id, user_id, contact_id, contact_name, contact_description, is_reference,
|
| 289 |
created_at, last_interaction, normalized_name, sequence_number, producer_id)
|
| 290 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 291 |
""",
|
| 292 |
(
|
| 293 |
session_id,
|
| 294 |
user_id,
|
| 295 |
+
session_id, # contact_id defaults to session_id
|
| 296 |
contact_name,
|
| 297 |
contact_description,
|
| 298 |
is_reference,
|
|
|
|
| 313 |
return ContactSession(
|
| 314 |
session_id=row["session_id"],
|
| 315 |
user_id=row["user_id"],
|
| 316 |
+
contact_id=row.get("contact_id") or row["session_id"],
|
| 317 |
contact_name=row["contact_name"],
|
| 318 |
contact_description=row["contact_description"],
|
| 319 |
is_reference=bool(row["is_reference"]),
|
|
|
|
| 395 |
cursor.execute(
|
| 396 |
"""
|
| 397 |
INSERT INTO contact_sessions
|
| 398 |
+
(session_id, user_id, contact_id, contact_name, contact_description, is_reference,
|
| 399 |
created_at, last_interaction, normalized_name, sequence_number, producer_id)
|
| 400 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 401 |
""",
|
| 402 |
(
|
| 403 |
session_id,
|
| 404 |
user_id,
|
| 405 |
+
session_id, # contact_id defaults to session_id
|
| 406 |
contact_name,
|
| 407 |
contact_description,
|
| 408 |
is_reference,
|
|
|
|
| 419 |
cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
|
| 420 |
row = cursor.fetchone()
|
| 421 |
conn.close()
|
| 422 |
+
|
| 423 |
+
# Trigger S3 backup for new contact creation (Feature 015)
|
| 424 |
+
backup_manager = _get_backup_manager()
|
| 425 |
+
if backup_manager:
|
| 426 |
+
backup_manager.request_backup()
|
| 427 |
|
| 428 |
return ContactSession(
|
| 429 |
session_id=row["session_id"],
|
| 430 |
user_id=row["user_id"],
|
| 431 |
+
contact_id=row["contact_id"] if row["contact_id"] else row["session_id"],
|
| 432 |
contact_name=row["contact_name"],
|
| 433 |
contact_description=row["contact_description"],
|
| 434 |
is_reference=bool(row["is_reference"]),
|
|
|
|
| 497 |
ContactSession(
|
| 498 |
session_id=row["session_id"],
|
| 499 |
user_id=row["user_id"],
|
| 500 |
+
contact_id=safe_row_access(row, "contact_id") or row["session_id"],
|
| 501 |
contact_name=row["contact_name"],
|
| 502 |
contact_description=row["contact_description"],
|
| 503 |
is_reference=bool(row["is_reference"]),
|
|
|
|
| 543 |
return ContactSession(
|
| 544 |
session_id=row["session_id"],
|
| 545 |
user_id=row["user_id"],
|
| 546 |
+
contact_id=row["contact_id"] if "contact_id" in columns else row["session_id"],
|
| 547 |
contact_name=row["contact_name"],
|
| 548 |
contact_description=row["contact_description"],
|
| 549 |
is_reference=bool(row["is_reference"]),
|
|
|
|
| 612 |
query = f"UPDATE contact_sessions SET {', '.join(updates)} WHERE session_id = ?"
|
| 613 |
cursor.execute(query, params)
|
| 614 |
conn.commit()
|
| 615 |
+
|
| 616 |
+
# Trigger S3 backup for critical metadata changes (Feature 015)
|
| 617 |
+
# Only when name or description changes (not for last_interaction updates)
|
| 618 |
+
backup_manager = _get_backup_manager()
|
| 619 |
+
if backup_manager:
|
| 620 |
+
backup_manager.request_backup()
|
| 621 |
|
| 622 |
# Fetch updated session
|
| 623 |
cursor.execute("SELECT * FROM contact_sessions WHERE session_id = ?", (session_id,))
|
src/utils/__pycache__/s3_logger.cpython-311.pyc
ADDED
|
Binary file (6.91 kB). View file
|
|
|
src/utils/s3_logger.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Structured JSON Logging for S3 Backup/Restore Operations
|
| 3 |
+
|
| 4 |
+
Provides consistent logging format for observability and debugging.
|
| 5 |
+
All logs are emitted as JSON for easy parsing by log aggregation systems.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import logging
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from typing import Optional, Dict, Any
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _log_event(event: str, **kwargs) -> None:
|
| 18 |
+
"""
|
| 19 |
+
Internal helper to emit structured JSON log messages.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
event: Event name (e.g., "backup_started", "restore_completed")
|
| 23 |
+
**kwargs: Additional event-specific fields
|
| 24 |
+
"""
|
| 25 |
+
log_data = {
|
| 26 |
+
'event': event,
|
| 27 |
+
'timestamp': datetime.utcnow().isoformat() + 'Z',
|
| 28 |
+
**kwargs
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
# Mask sensitive data
|
| 32 |
+
if 's3_key' in log_data:
|
| 33 |
+
# S3 keys are safe to log
|
| 34 |
+
pass
|
| 35 |
+
if 'error' in log_data and isinstance(log_data['error'], Exception):
|
| 36 |
+
# Convert exception to string
|
| 37 |
+
log_data['error'] = str(log_data['error'])
|
| 38 |
+
|
| 39 |
+
logger.info(json.dumps(log_data))
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def backup_started(db_path: str, db_size_bytes: int) -> None:
|
| 43 |
+
"""
|
| 44 |
+
Log that a backup operation has started.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
db_path: Path to the database file being backed up
|
| 48 |
+
db_size_bytes: Size of the database in bytes
|
| 49 |
+
"""
|
| 50 |
+
_log_event(
|
| 51 |
+
'backup_started',
|
| 52 |
+
db_path=db_path,
|
| 53 |
+
db_size_bytes=db_size_bytes
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def backup_completed(
|
| 58 |
+
duration_seconds: float,
|
| 59 |
+
s3_key: str,
|
| 60 |
+
upload_size_bytes: int
|
| 61 |
+
) -> None:
|
| 62 |
+
"""
|
| 63 |
+
Log that a backup operation completed successfully.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
duration_seconds: Time taken for the backup operation
|
| 67 |
+
s3_key: S3 object key where backup was stored
|
| 68 |
+
upload_size_bytes: Size of uploaded file in bytes
|
| 69 |
+
"""
|
| 70 |
+
_log_event(
|
| 71 |
+
'backup_completed',
|
| 72 |
+
duration_seconds=round(duration_seconds, 2),
|
| 73 |
+
s3_key=s3_key,
|
| 74 |
+
upload_size_bytes=upload_size_bytes
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def backup_failed(
|
| 79 |
+
error: str,
|
| 80 |
+
retry_attempt: Optional[int] = None,
|
| 81 |
+
max_retries: Optional[int] = None
|
| 82 |
+
) -> None:
|
| 83 |
+
"""
|
| 84 |
+
Log that a backup operation failed.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
error: Error message or exception
|
| 88 |
+
retry_attempt: Current retry attempt number (if retrying)
|
| 89 |
+
max_retries: Maximum number of retries (if retrying)
|
| 90 |
+
"""
|
| 91 |
+
log_data = {
|
| 92 |
+
'error': str(error)
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
if retry_attempt is not None:
|
| 96 |
+
log_data['retry_attempt'] = retry_attempt
|
| 97 |
+
if max_retries is not None:
|
| 98 |
+
log_data['max_retries'] = max_retries
|
| 99 |
+
|
| 100 |
+
_log_event('backup_failed', **log_data)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def restore_started() -> None:
|
| 104 |
+
"""Log that a restore operation has started."""
|
| 105 |
+
_log_event('restore_started')
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def restore_completed(
|
| 109 |
+
duration_seconds: float,
|
| 110 |
+
s3_key: Optional[str],
|
| 111 |
+
download_size_bytes: Optional[int],
|
| 112 |
+
result: str
|
| 113 |
+
) -> None:
|
| 114 |
+
"""
|
| 115 |
+
Log that a restore operation completed.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
duration_seconds: Time taken for the restore operation
|
| 119 |
+
s3_key: S3 object key that was restored (None if no restore needed)
|
| 120 |
+
download_size_bytes: Size of downloaded file (None if no download)
|
| 121 |
+
result: Restore result enum value (e.g., "restored_from_s3", "local_newer")
|
| 122 |
+
"""
|
| 123 |
+
log_data = {
|
| 124 |
+
'duration_seconds': round(duration_seconds, 2),
|
| 125 |
+
'result': result
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
if s3_key:
|
| 129 |
+
log_data['s3_key'] = s3_key
|
| 130 |
+
if download_size_bytes:
|
| 131 |
+
log_data['download_size_bytes'] = download_size_bytes
|
| 132 |
+
|
| 133 |
+
_log_event('restore_completed', **log_data)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def restore_fallback(reason: str, fallback_action: str) -> None:
|
| 137 |
+
"""
|
| 138 |
+
Log that a restore operation fell back to an alternative.
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
reason: Reason for fallback (e.g., "validation_failed", "network_error")
|
| 142 |
+
fallback_action: Action taken (e.g., "using_local_database", "initializing_empty")
|
| 143 |
+
"""
|
| 144 |
+
_log_event(
|
| 145 |
+
'restore_fallback',
|
| 146 |
+
reason=reason,
|
| 147 |
+
fallback_action=fallback_action
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def backup_debounced(pending_requests: int, debounce_seconds: int) -> None:
|
| 152 |
+
"""
|
| 153 |
+
Log that backup requests are being debounced.
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
pending_requests: Number of backup requests collapsed into one
|
| 157 |
+
debounce_seconds: Debounce period in seconds
|
| 158 |
+
"""
|
| 159 |
+
_log_event(
|
| 160 |
+
'backup_debounced',
|
| 161 |
+
pending_requests=pending_requests,
|
| 162 |
+
debounce_seconds=debounce_seconds
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def s3_credentials_validated(bucket: str, endpoint: Optional[str]) -> None:
|
| 167 |
+
"""
|
| 168 |
+
Log that S3 credentials were successfully validated.
|
| 169 |
+
|
| 170 |
+
Args:
|
| 171 |
+
bucket: S3 bucket name
|
| 172 |
+
endpoint: S3 endpoint URL (None for AWS S3)
|
| 173 |
+
"""
|
| 174 |
+
_log_event(
|
| 175 |
+
's3_credentials_validated',
|
| 176 |
+
bucket=bucket,
|
| 177 |
+
endpoint=endpoint or 'AWS S3'
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def s3_credentials_invalid(error: str) -> None:
|
| 182 |
+
"""
|
| 183 |
+
Log that S3 credentials validation failed.
|
| 184 |
+
|
| 185 |
+
Args:
|
| 186 |
+
error: Error message
|
| 187 |
+
"""
|
| 188 |
+
_log_event(
|
| 189 |
+
's3_credentials_invalid',
|
| 190 |
+
error=str(error)
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def backup_skip_reason(reason: str, operation: str) -> None:
|
| 195 |
+
"""
|
| 196 |
+
Log why a backup was skipped.
|
| 197 |
+
|
| 198 |
+
Args:
|
| 199 |
+
reason: Reason for skipping (e.g., "s3_not_enabled", "non_critical_update")
|
| 200 |
+
operation: Operation that would have triggered backup
|
| 201 |
+
"""
|
| 202 |
+
_log_event(
|
| 203 |
+
'backup_skip',
|
| 204 |
+
reason=reason,
|
| 205 |
+
operation=operation
|
| 206 |
+
)
|
tests/integration/test_s3_minio.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Integration tests for S3 backup/restore with MinIO.
|
| 3 |
+
|
| 4 |
+
Feature: 015-sqlite-s3-backup
|
| 5 |
+
|
| 6 |
+
These tests require a running MinIO instance. They can be run with:
|
| 7 |
+
docker compose up -d minio
|
| 8 |
+
pytest webapp/tests/integration/test_s3_minio.py
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import time
|
| 13 |
+
import tempfile
|
| 14 |
+
import sqlite3
|
| 15 |
+
import pytest
|
| 16 |
+
|
| 17 |
+
from src.services.s3_config import S3Config
|
| 18 |
+
from src.services.s3_backup import BackupManager
|
| 19 |
+
from src.services.s3_restore import RestoreManager, RestoreResult
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Check if MinIO is available
|
| 23 |
+
MINIO_AVAILABLE = os.getenv('MINIO_TEST_ENABLED', 'false').lower() == 'true'
|
| 24 |
+
|
| 25 |
+
skip_if_no_minio = pytest.mark.skipif(
|
| 26 |
+
not MINIO_AVAILABLE,
|
| 27 |
+
reason="MinIO not available - set MINIO_TEST_ENABLED=true to run"
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@pytest.fixture
|
| 32 |
+
def minio_config():
|
| 33 |
+
"""Create S3Config for local MinIO instance."""
|
| 34 |
+
return S3Config(
|
| 35 |
+
enabled=True,
|
| 36 |
+
bucket='test-prepmate-backups',
|
| 37 |
+
access_key='minioadmin',
|
| 38 |
+
secret_key='minioadmin',
|
| 39 |
+
region='us-east-1',
|
| 40 |
+
endpoint_url='http://localhost:9000',
|
| 41 |
+
debounce_seconds=5 # Shorter for tests
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@pytest.fixture
|
| 46 |
+
def test_db():
|
| 47 |
+
"""Create a temporary test database."""
|
| 48 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 49 |
+
db_path = tmp.name
|
| 50 |
+
|
| 51 |
+
# Initialize database
|
| 52 |
+
conn = sqlite3.connect(db_path)
|
| 53 |
+
conn.execute("CREATE TABLE contacts (id INTEGER PRIMARY KEY, name TEXT)")
|
| 54 |
+
conn.execute("INSERT INTO contacts VALUES (1, 'Alice')")
|
| 55 |
+
conn.execute("INSERT INTO contacts VALUES (2, 'Bob')")
|
| 56 |
+
conn.commit()
|
| 57 |
+
conn.close()
|
| 58 |
+
|
| 59 |
+
yield db_path
|
| 60 |
+
|
| 61 |
+
# Cleanup
|
| 62 |
+
if os.path.exists(db_path):
|
| 63 |
+
os.unlink(db_path)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@skip_if_no_minio
|
| 67 |
+
class TestMinIOIntegration:
|
| 68 |
+
"""Integration tests with MinIO S3-compatible storage."""
|
| 69 |
+
|
| 70 |
+
def test_backup_restore_roundtrip(self, minio_config, test_db):
|
| 71 |
+
"""Test complete backup and restore cycle."""
|
| 72 |
+
# Create backup manager and execute backup
|
| 73 |
+
backup_manager = BackupManager(minio_config, test_db)
|
| 74 |
+
success = backup_manager.execute_backup_now()
|
| 75 |
+
|
| 76 |
+
assert success is True
|
| 77 |
+
|
| 78 |
+
# Verify backup exists in S3
|
| 79 |
+
metadata = backup_manager.get_latest_backup()
|
| 80 |
+
assert metadata is not None
|
| 81 |
+
assert metadata.s3_key.startswith('contacts-')
|
| 82 |
+
assert metadata.size_bytes > 0
|
| 83 |
+
|
| 84 |
+
# Delete local database
|
| 85 |
+
os.unlink(test_db)
|
| 86 |
+
assert not os.path.exists(test_db)
|
| 87 |
+
|
| 88 |
+
# Restore from S3
|
| 89 |
+
restore_manager = RestoreManager(minio_config, test_db)
|
| 90 |
+
result = restore_manager.restore_from_s3()
|
| 91 |
+
|
| 92 |
+
assert result == RestoreResult.RESTORED_FROM_S3
|
| 93 |
+
assert os.path.exists(test_db)
|
| 94 |
+
|
| 95 |
+
# Verify restored data
|
| 96 |
+
conn = sqlite3.connect(test_db)
|
| 97 |
+
cursor = conn.cursor()
|
| 98 |
+
cursor.execute("SELECT * FROM contacts ORDER BY id")
|
| 99 |
+
rows = cursor.fetchall()
|
| 100 |
+
conn.close()
|
| 101 |
+
|
| 102 |
+
assert len(rows) == 2
|
| 103 |
+
assert rows[0] == (1, 'Alice')
|
| 104 |
+
assert rows[1] == (2, 'Bob')
|
| 105 |
+
|
| 106 |
+
def test_multiple_rapid_creates(self, minio_config, test_db):
|
| 107 |
+
"""Test debouncing with 10 rapid backup requests."""
|
| 108 |
+
backup_manager = BackupManager(minio_config, test_db)
|
| 109 |
+
|
| 110 |
+
# Get initial backup count
|
| 111 |
+
initial_metadata = backup_manager.get_latest_backup()
|
| 112 |
+
initial_count = 0 if initial_metadata is None else 1
|
| 113 |
+
|
| 114 |
+
# Request 10 rapid backups
|
| 115 |
+
for i in range(10):
|
| 116 |
+
backup_manager.request_backup()
|
| 117 |
+
time.sleep(0.1) # Small delay between requests
|
| 118 |
+
|
| 119 |
+
# Wait for debounce period + execution
|
| 120 |
+
time.sleep(minio_config.debounce_seconds + 5)
|
| 121 |
+
|
| 122 |
+
# Should have only 1-2 new backups due to debouncing
|
| 123 |
+
# (1 if all collapsed, 2 if one more request came during execution)
|
| 124 |
+
final_metadata = backup_manager.get_latest_backup()
|
| 125 |
+
# Just verify at least one backup succeeded
|
| 126 |
+
assert final_metadata is not None
|
| 127 |
+
|
| 128 |
+
def test_restore_on_webapp_startup(self, minio_config, test_db):
|
| 129 |
+
"""Test restore during simulated webapp startup."""
|
| 130 |
+
# Create a backup
|
| 131 |
+
backup_manager = BackupManager(minio_config, test_db)
|
| 132 |
+
backup_manager.execute_backup_now()
|
| 133 |
+
|
| 134 |
+
# Modify local database
|
| 135 |
+
conn = sqlite3.connect(test_db)
|
| 136 |
+
conn.execute("INSERT INTO contacts VALUES (3, 'Charlie')")
|
| 137 |
+
conn.commit()
|
| 138 |
+
conn.close()
|
| 139 |
+
|
| 140 |
+
# Simulate startup restore (should keep newer local)
|
| 141 |
+
restore_manager = RestoreManager(minio_config, test_db)
|
| 142 |
+
result = restore_manager.restore_from_s3()
|
| 143 |
+
|
| 144 |
+
# Local is newer, should not restore
|
| 145 |
+
assert result == RestoreResult.LOCAL_NEWER
|
| 146 |
+
|
| 147 |
+
# Verify Charlie still exists
|
| 148 |
+
conn = sqlite3.connect(test_db)
|
| 149 |
+
cursor = conn.cursor()
|
| 150 |
+
cursor.execute("SELECT COUNT(*) FROM contacts")
|
| 151 |
+
count = cursor.fetchone()[0]
|
| 152 |
+
conn.close()
|
| 153 |
+
|
| 154 |
+
assert count == 3
|
| 155 |
+
|
| 156 |
+
def test_graceful_degradation(self, test_db):
|
| 157 |
+
"""Test webapp continues when S3 is unavailable."""
|
| 158 |
+
# Create config with wrong endpoint
|
| 159 |
+
bad_config = S3Config(
|
| 160 |
+
enabled=True,
|
| 161 |
+
bucket='test-bucket',
|
| 162 |
+
access_key='test',
|
| 163 |
+
secret_key='test',
|
| 164 |
+
endpoint_url='http://nonexistent:9999'
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# Backup should fail gracefully
|
| 168 |
+
backup_manager = BackupManager(bad_config, test_db)
|
| 169 |
+
success = backup_manager.execute_backup_now()
|
| 170 |
+
|
| 171 |
+
assert success is False # Failed but didn't crash
|
| 172 |
+
|
| 173 |
+
# Restore should fall back gracefully
|
| 174 |
+
restore_manager = RestoreManager(bad_config, test_db)
|
| 175 |
+
result = restore_manager.restore_from_s3()
|
| 176 |
+
|
| 177 |
+
assert result == RestoreResult.NETWORK_ERROR
|
| 178 |
+
|
| 179 |
+
def test_performance_budgets(self, minio_config, test_db):
|
| 180 |
+
"""Test backup/restore performance meets targets."""
|
| 181 |
+
# Backup performance: <10s for 50MB database
|
| 182 |
+
# (test DB is smaller, so should be much faster)
|
| 183 |
+
backup_manager = BackupManager(minio_config, test_db)
|
| 184 |
+
|
| 185 |
+
start = time.time()
|
| 186 |
+
success = backup_manager.execute_backup_now()
|
| 187 |
+
backup_duration = time.time() - start
|
| 188 |
+
|
| 189 |
+
assert success is True
|
| 190 |
+
assert backup_duration < 10, f"Backup took {backup_duration}s (target: <10s)"
|
| 191 |
+
|
| 192 |
+
# Restore performance: <15s
|
| 193 |
+
restore_manager = RestoreManager(minio_config, test_db)
|
| 194 |
+
|
| 195 |
+
start = time.time()
|
| 196 |
+
result = restore_manager.restore_from_s3()
|
| 197 |
+
restore_duration = time.time() - start
|
| 198 |
+
|
| 199 |
+
assert result in [RestoreResult.RESTORED_FROM_S3, RestoreResult.LOCAL_NEWER]
|
| 200 |
+
assert restore_duration < 15, f"Restore took {restore_duration}s (target: <15s)"
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
if __name__ == '__main__':
|
| 204 |
+
"""
|
| 205 |
+
Run integration tests with MinIO.
|
| 206 |
+
|
| 207 |
+
Usage:
|
| 208 |
+
# Start MinIO
|
| 209 |
+
docker compose up -d minio
|
| 210 |
+
|
| 211 |
+
# Run tests
|
| 212 |
+
MINIO_TEST_ENABLED=true pytest webapp/tests/integration/test_s3_minio.py -v
|
| 213 |
+
"""
|
| 214 |
+
pytest.main([__file__, '-v'])
|
tests/unit/test_s3_backup.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Unit tests for BackupManager module.
|
| 3 |
+
|
| 4 |
+
Feature: 015-sqlite-s3-backup
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import time
|
| 9 |
+
import tempfile
|
| 10 |
+
import sqlite3
|
| 11 |
+
import pytest
|
| 12 |
+
from unittest.mock import patch, MagicMock, call
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
|
| 15 |
+
from src.services.s3_config import S3Config
|
| 16 |
+
from src.services.s3_backup import BackupManager, BackupMetadata
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class TestBackupManagerInit:
|
| 20 |
+
"""Tests for BackupManager initialization."""
|
| 21 |
+
|
| 22 |
+
def test_backup_manager_init_enabled(self):
|
| 23 |
+
"""Test BackupManager initialization with S3 enabled."""
|
| 24 |
+
config = S3Config(
|
| 25 |
+
enabled=True,
|
| 26 |
+
bucket='test-bucket',
|
| 27 |
+
access_key='test-key',
|
| 28 |
+
secret_key='test-secret'
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Create temp database for testing
|
| 32 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 33 |
+
db_path = tmp.name
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
# Initialize database
|
| 37 |
+
conn = sqlite3.connect(db_path)
|
| 38 |
+
conn.execute("CREATE TABLE test (id INTEGER)")
|
| 39 |
+
conn.close()
|
| 40 |
+
|
| 41 |
+
# Create manager
|
| 42 |
+
with patch.object(config, 'create_s3_client'):
|
| 43 |
+
manager = BackupManager(config, db_path)
|
| 44 |
+
|
| 45 |
+
assert manager.config == config
|
| 46 |
+
assert manager.db_path == db_path
|
| 47 |
+
assert manager.last_backup_request is None
|
| 48 |
+
finally:
|
| 49 |
+
os.unlink(db_path)
|
| 50 |
+
|
| 51 |
+
def test_backup_manager_init_disabled(self):
|
| 52 |
+
"""Test BackupManager initialization with S3 disabled."""
|
| 53 |
+
config = S3Config(enabled=False)
|
| 54 |
+
|
| 55 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 56 |
+
db_path = tmp.name
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
manager = BackupManager(config, db_path)
|
| 60 |
+
|
| 61 |
+
assert manager.config == config
|
| 62 |
+
assert manager.s3_client is None
|
| 63 |
+
finally:
|
| 64 |
+
os.unlink(db_path)
|
| 65 |
+
|
| 66 |
+
def test_backup_manager_init_missing_db(self):
|
| 67 |
+
"""Test BackupManager initialization with non-existent database."""
|
| 68 |
+
config = S3Config(enabled=True)
|
| 69 |
+
|
| 70 |
+
with pytest.raises(ValueError, match="does not exist"):
|
| 71 |
+
BackupManager(config, "/nonexistent/path.db")
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class TestBackupManagerRequestBackup:
|
| 75 |
+
"""Tests for BackupManager.request_backup() method."""
|
| 76 |
+
|
| 77 |
+
@patch('threading.Thread')
|
| 78 |
+
def test_request_backup_debouncing(self, mock_thread):
|
| 79 |
+
"""Test that multiple backup requests are debounced."""
|
| 80 |
+
config = S3Config(enabled=True, debounce_seconds=5)
|
| 81 |
+
|
| 82 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 83 |
+
db_path = tmp.name
|
| 84 |
+
|
| 85 |
+
try:
|
| 86 |
+
conn = sqlite3.connect(db_path)
|
| 87 |
+
conn.execute("CREATE TABLE test (id INTEGER)")
|
| 88 |
+
conn.close()
|
| 89 |
+
|
| 90 |
+
with patch.object(config, 'create_s3_client'):
|
| 91 |
+
manager = BackupManager(config, db_path)
|
| 92 |
+
|
| 93 |
+
# Request multiple backups
|
| 94 |
+
manager.request_backup()
|
| 95 |
+
time1 = manager.last_backup_request
|
| 96 |
+
|
| 97 |
+
time.sleep(0.1)
|
| 98 |
+
|
| 99 |
+
manager.request_backup()
|
| 100 |
+
time2 = manager.last_backup_request
|
| 101 |
+
|
| 102 |
+
# Second request should update timestamp
|
| 103 |
+
assert time2 > time1
|
| 104 |
+
|
| 105 |
+
# Thread should be started
|
| 106 |
+
assert mock_thread.called
|
| 107 |
+
finally:
|
| 108 |
+
os.unlink(db_path)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
class TestBackupManagerExecuteBackup:
|
| 112 |
+
"""Tests for BackupManager._execute_backup() method."""
|
| 113 |
+
|
| 114 |
+
@patch('boto3.client')
|
| 115 |
+
def test_execute_backup_success(self, mock_boto_client):
|
| 116 |
+
"""Test successful backup execution."""
|
| 117 |
+
# Setup mocks
|
| 118 |
+
mock_s3 = MagicMock()
|
| 119 |
+
mock_boto_client.return_value = mock_s3
|
| 120 |
+
mock_s3.upload_fileobj.return_value = None
|
| 121 |
+
|
| 122 |
+
config = S3Config(
|
| 123 |
+
enabled=True,
|
| 124 |
+
bucket='test-bucket',
|
| 125 |
+
access_key='test-key',
|
| 126 |
+
secret_key='test-secret'
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 130 |
+
db_path = tmp.name
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
# Create test database
|
| 134 |
+
conn = sqlite3.connect(db_path)
|
| 135 |
+
conn.execute("CREATE TABLE test (id INTEGER)")
|
| 136 |
+
conn.execute("INSERT INTO test VALUES (1)")
|
| 137 |
+
conn.commit()
|
| 138 |
+
conn.close()
|
| 139 |
+
|
| 140 |
+
# Create manager and execute backup
|
| 141 |
+
with patch.object(config, 'create_s3_client', return_value=mock_s3):
|
| 142 |
+
manager = BackupManager(config, db_path)
|
| 143 |
+
result = manager.execute_backup_now()
|
| 144 |
+
|
| 145 |
+
assert result is True
|
| 146 |
+
# Verify S3 upload was called
|
| 147 |
+
assert mock_s3.upload_fileobj.called
|
| 148 |
+
finally:
|
| 149 |
+
os.unlink(db_path)
|
| 150 |
+
|
| 151 |
+
@patch('boto3.client')
|
| 152 |
+
def test_execute_backup_s3_failure(self, mock_boto_client):
|
| 153 |
+
"""Test backup with S3 upload failure."""
|
| 154 |
+
# Setup mocks
|
| 155 |
+
mock_s3 = MagicMock()
|
| 156 |
+
mock_boto_client.return_value = mock_s3
|
| 157 |
+
|
| 158 |
+
from botocore.exceptions import ClientError
|
| 159 |
+
mock_s3.upload_fileobj.side_effect = ClientError(
|
| 160 |
+
{'Error': {'Code': 'ServiceUnavailable'}},
|
| 161 |
+
'UploadFileobj'
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
config = S3Config(
|
| 165 |
+
enabled=True,
|
| 166 |
+
bucket='test-bucket',
|
| 167 |
+
access_key='test-key',
|
| 168 |
+
secret_key='test-secret'
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 172 |
+
db_path = tmp.name
|
| 173 |
+
|
| 174 |
+
try:
|
| 175 |
+
conn = sqlite3.connect(db_path)
|
| 176 |
+
conn.execute("CREATE TABLE test (id INTEGER)")
|
| 177 |
+
conn.close()
|
| 178 |
+
|
| 179 |
+
with patch.object(config, 'create_s3_client', return_value=mock_s3):
|
| 180 |
+
manager = BackupManager(config, db_path)
|
| 181 |
+
result = manager.execute_backup_now()
|
| 182 |
+
|
| 183 |
+
# Backup should fail gracefully
|
| 184 |
+
assert result is False
|
| 185 |
+
finally:
|
| 186 |
+
os.unlink(db_path)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
class TestBackupManagerGetLatestBackup:
|
| 190 |
+
"""Tests for BackupManager.get_latest_backup() method."""
|
| 191 |
+
|
| 192 |
+
@patch('boto3.client')
|
| 193 |
+
def test_get_latest_backup_success(self, mock_boto_client):
|
| 194 |
+
"""Test querying latest backup from S3."""
|
| 195 |
+
mock_s3 = MagicMock()
|
| 196 |
+
mock_boto_client.return_value = mock_s3
|
| 197 |
+
|
| 198 |
+
# Mock S3 response
|
| 199 |
+
mock_s3.list_objects_v2.return_value = {
|
| 200 |
+
'Contents': [
|
| 201 |
+
{
|
| 202 |
+
'Key': 'contacts-2025-11-27-10-00-00.db',
|
| 203 |
+
'LastModified': datetime(2025, 11, 27, 10, 0, 0),
|
| 204 |
+
'Size': 1048576
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
'Key': 'contacts-2025-11-27-11-00-00.db',
|
| 208 |
+
'LastModified': datetime(2025, 11, 27, 11, 0, 0),
|
| 209 |
+
'Size': 1048576
|
| 210 |
+
}
|
| 211 |
+
]
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
mock_s3.head_object.return_value = {
|
| 215 |
+
'Metadata': {'sha256': 'abc123'}
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
config = S3Config(
|
| 219 |
+
enabled=True,
|
| 220 |
+
bucket='test-bucket',
|
| 221 |
+
access_key='test-key',
|
| 222 |
+
secret_key='test-secret'
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 226 |
+
db_path = tmp.name
|
| 227 |
+
|
| 228 |
+
try:
|
| 229 |
+
conn = sqlite3.connect(db_path)
|
| 230 |
+
conn.execute("CREATE TABLE test (id INTEGER)")
|
| 231 |
+
conn.close()
|
| 232 |
+
|
| 233 |
+
with patch.object(config, 'create_s3_client', return_value=mock_s3):
|
| 234 |
+
manager = BackupManager(config, db_path)
|
| 235 |
+
metadata = manager.get_latest_backup()
|
| 236 |
+
|
| 237 |
+
assert metadata is not None
|
| 238 |
+
assert metadata.s3_key == 'contacts-2025-11-27-11-00-00.db'
|
| 239 |
+
assert metadata.checksum_sha256 == 'abc123'
|
| 240 |
+
finally:
|
| 241 |
+
os.unlink(db_path)
|
tests/unit/test_s3_config.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Unit tests for S3Config module.
|
| 3 |
+
|
| 4 |
+
Feature: 015-sqlite-s3-backup
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import pytest
|
| 9 |
+
from unittest.mock import patch, MagicMock
|
| 10 |
+
|
| 11 |
+
from src.services.s3_config import (
|
| 12 |
+
S3Config,
|
| 13 |
+
S3CredentialsError,
|
| 14 |
+
S3BucketNotFoundError,
|
| 15 |
+
S3ConnectionError
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class TestS3ConfigFromEnv:
|
| 20 |
+
"""Tests for S3Config.from_env() factory method."""
|
| 21 |
+
|
| 22 |
+
@patch.dict(os.environ, {
|
| 23 |
+
'S3_BACKUP_ENABLED': 'true',
|
| 24 |
+
'S3_BUCKET_NAME': 'test-bucket',
|
| 25 |
+
'S3_ACCESS_KEY': 'test-access-key',
|
| 26 |
+
'S3_SECRET_KEY': 'test-secret-key',
|
| 27 |
+
'S3_REGION': 'us-west-2'
|
| 28 |
+
})
|
| 29 |
+
def test_from_env_valid_config(self):
|
| 30 |
+
"""Test loading valid S3 configuration from environment."""
|
| 31 |
+
config = S3Config.from_env()
|
| 32 |
+
|
| 33 |
+
assert config.enabled is True
|
| 34 |
+
assert config.bucket == 'test-bucket'
|
| 35 |
+
assert config.access_key == 'test-access-key'
|
| 36 |
+
assert config.secret_key == 'test-secret-key'
|
| 37 |
+
assert config.region == 'us-west-2'
|
| 38 |
+
assert config.upload_timeout == 60
|
| 39 |
+
assert config.download_timeout == 30
|
| 40 |
+
assert config.debounce_seconds == 300
|
| 41 |
+
|
| 42 |
+
@patch.dict(os.environ, {}, clear=True)
|
| 43 |
+
def test_from_env_missing_config(self):
|
| 44 |
+
"""Test that missing S3_BACKUP_ENABLED defaults to disabled."""
|
| 45 |
+
config = S3Config.from_env()
|
| 46 |
+
|
| 47 |
+
assert config.enabled is False
|
| 48 |
+
|
| 49 |
+
@patch.dict(os.environ, {
|
| 50 |
+
'S3_BACKUP_ENABLED': 'true',
|
| 51 |
+
'S3_BUCKET_NAME': 'test-bucket',
|
| 52 |
+
# Missing S3_ACCESS_KEY and S3_SECRET_KEY
|
| 53 |
+
}, clear=True)
|
| 54 |
+
def test_from_env_partial_config(self):
|
| 55 |
+
"""Test that partial configuration disables the feature with warning."""
|
| 56 |
+
config = S3Config.from_env()
|
| 57 |
+
|
| 58 |
+
assert config.enabled is False
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class TestS3ConfigValidateCredentials:
|
| 62 |
+
"""Tests for S3Config.validate_credentials() method."""
|
| 63 |
+
|
| 64 |
+
@patch('boto3.client')
|
| 65 |
+
def test_validate_credentials_success(self, mock_boto_client):
|
| 66 |
+
"""Test successful credential validation."""
|
| 67 |
+
# Setup mock
|
| 68 |
+
mock_s3 = MagicMock()
|
| 69 |
+
mock_boto_client.return_value = mock_s3
|
| 70 |
+
mock_s3.head_bucket.return_value = {}
|
| 71 |
+
|
| 72 |
+
# Create config
|
| 73 |
+
config = S3Config(
|
| 74 |
+
enabled=True,
|
| 75 |
+
bucket='test-bucket',
|
| 76 |
+
access_key='test-key',
|
| 77 |
+
secret_key='test-secret'
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Validate
|
| 81 |
+
result = config.validate_credentials()
|
| 82 |
+
|
| 83 |
+
assert result is True
|
| 84 |
+
mock_s3.head_bucket.assert_called_once_with(Bucket='test-bucket')
|
| 85 |
+
|
| 86 |
+
@patch('boto3.client')
|
| 87 |
+
def test_validate_credentials_bucket_not_found(self, mock_boto_client):
|
| 88 |
+
"""Test validation with non-existent bucket."""
|
| 89 |
+
# Setup mock
|
| 90 |
+
mock_s3 = MagicMock()
|
| 91 |
+
mock_boto_client.return_value = mock_s3
|
| 92 |
+
|
| 93 |
+
from botocore.exceptions import ClientError
|
| 94 |
+
mock_s3.head_bucket.side_effect = ClientError(
|
| 95 |
+
{'Error': {'Code': '404'}},
|
| 96 |
+
'HeadBucket'
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Create config
|
| 100 |
+
config = S3Config(
|
| 101 |
+
enabled=True,
|
| 102 |
+
bucket='nonexistent-bucket',
|
| 103 |
+
access_key='test-key',
|
| 104 |
+
secret_key='test-secret'
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# Validate
|
| 108 |
+
result = config.validate_credentials()
|
| 109 |
+
|
| 110 |
+
assert result is False
|
| 111 |
+
|
| 112 |
+
@patch('boto3.client')
|
| 113 |
+
def test_validate_credentials_access_denied(self, mock_boto_client):
|
| 114 |
+
"""Test validation with invalid credentials."""
|
| 115 |
+
# Setup mock
|
| 116 |
+
mock_s3 = MagicMock()
|
| 117 |
+
mock_boto_client.return_value = mock_s3
|
| 118 |
+
|
| 119 |
+
from botocore.exceptions import ClientError
|
| 120 |
+
mock_s3.head_bucket.side_effect = ClientError(
|
| 121 |
+
{'Error': {'Code': '403'}},
|
| 122 |
+
'HeadBucket'
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Create config
|
| 126 |
+
config = S3Config(
|
| 127 |
+
enabled=True,
|
| 128 |
+
bucket='test-bucket',
|
| 129 |
+
access_key='invalid-key',
|
| 130 |
+
secret_key='invalid-secret'
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Validate
|
| 134 |
+
result = config.validate_credentials()
|
| 135 |
+
|
| 136 |
+
assert result is False
|
tests/unit/test_s3_restore.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Unit tests for RestoreManager module.
|
| 3 |
+
|
| 4 |
+
Feature: 015-sqlite-s3-backup
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import tempfile
|
| 9 |
+
import sqlite3
|
| 10 |
+
import pytest
|
| 11 |
+
from unittest.mock import patch, MagicMock
|
| 12 |
+
from datetime import datetime, timezone
|
| 13 |
+
|
| 14 |
+
from src.services.s3_config import S3Config
|
| 15 |
+
from src.services.s3_restore import RestoreManager, RestoreResult
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class TestRestoreManagerRestoreFromS3:
|
| 19 |
+
"""Tests for RestoreManager.restore_from_s3() method."""
|
| 20 |
+
|
| 21 |
+
@patch('boto3.client')
|
| 22 |
+
def test_restore_from_s3_success(self, mock_boto_client):
|
| 23 |
+
"""Test successful restore from S3."""
|
| 24 |
+
mock_s3 = MagicMock()
|
| 25 |
+
mock_boto_client.return_value = mock_s3
|
| 26 |
+
|
| 27 |
+
# Mock S3 responses
|
| 28 |
+
mock_s3.list_objects_v2.return_value = {
|
| 29 |
+
'Contents': [{
|
| 30 |
+
'Key': 'contacts-2025-11-27-12-00-00.db',
|
| 31 |
+
'LastModified': datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc),
|
| 32 |
+
'Size': 1048576
|
| 33 |
+
}]
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
mock_s3.head_object.return_value = {
|
| 37 |
+
'Metadata': {'sha256': 'abc123'}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
# Create temp files
|
| 41 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 42 |
+
db_path = tmp.name
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# Create a valid SQLite database for download
|
| 46 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as backup_tmp:
|
| 47 |
+
backup_path = backup_tmp.name
|
| 48 |
+
|
| 49 |
+
conn = sqlite3.connect(backup_path)
|
| 50 |
+
conn.execute("CREATE TABLE test (id INTEGER)")
|
| 51 |
+
conn.execute("INSERT INTO test VALUES (1)")
|
| 52 |
+
conn.commit()
|
| 53 |
+
conn.close()
|
| 54 |
+
|
| 55 |
+
# Mock download to copy the backup file
|
| 56 |
+
def mock_download(bucket, key, dest):
|
| 57 |
+
import shutil
|
| 58 |
+
shutil.copy(backup_path, dest)
|
| 59 |
+
|
| 60 |
+
mock_s3.download_file.side_effect = mock_download
|
| 61 |
+
|
| 62 |
+
config = S3Config(
|
| 63 |
+
enabled=True,
|
| 64 |
+
bucket='test-bucket',
|
| 65 |
+
access_key='test-key',
|
| 66 |
+
secret_key='test-secret'
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
with patch.object(config, 'create_s3_client', return_value=mock_s3):
|
| 70 |
+
manager = RestoreManager(config, db_path)
|
| 71 |
+
result = manager.restore_from_s3()
|
| 72 |
+
|
| 73 |
+
# Should restore successfully
|
| 74 |
+
assert result == RestoreResult.RESTORED_FROM_S3
|
| 75 |
+
assert os.path.exists(db_path)
|
| 76 |
+
|
| 77 |
+
# Verify database was restored correctly
|
| 78 |
+
conn = sqlite3.connect(db_path)
|
| 79 |
+
cursor = conn.cursor()
|
| 80 |
+
cursor.execute("SELECT * FROM test")
|
| 81 |
+
rows = cursor.fetchall()
|
| 82 |
+
conn.close()
|
| 83 |
+
|
| 84 |
+
assert len(rows) == 1
|
| 85 |
+
assert rows[0][0] == 1
|
| 86 |
+
|
| 87 |
+
os.unlink(backup_path)
|
| 88 |
+
finally:
|
| 89 |
+
if os.path.exists(db_path):
|
| 90 |
+
os.unlink(db_path)
|
| 91 |
+
|
| 92 |
+
@patch('boto3.client')
|
| 93 |
+
def test_restore_validation_failure(self, mock_boto_client):
|
| 94 |
+
"""Test restore with corrupted backup file."""
|
| 95 |
+
mock_s3 = MagicMock()
|
| 96 |
+
mock_boto_client.return_value = mock_s3
|
| 97 |
+
|
| 98 |
+
mock_s3.list_objects_v2.return_value = {
|
| 99 |
+
'Contents': [{
|
| 100 |
+
'Key': 'contacts-2025-11-27-12-00-00.db',
|
| 101 |
+
'LastModified': datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc),
|
| 102 |
+
'Size': 1048576
|
| 103 |
+
}]
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
# Mock download to create invalid file
|
| 107 |
+
def mock_download(bucket, key, dest):
|
| 108 |
+
with open(dest, 'wb') as f:
|
| 109 |
+
f.write(b'invalid sqlite data')
|
| 110 |
+
|
| 111 |
+
mock_s3.download_file.side_effect = mock_download
|
| 112 |
+
|
| 113 |
+
config = S3Config(enabled=True, bucket='test-bucket')
|
| 114 |
+
|
| 115 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 116 |
+
db_path = tmp.name
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
with patch.object(config, 'create_s3_client', return_value=mock_s3):
|
| 120 |
+
manager = RestoreManager(config, db_path)
|
| 121 |
+
result = manager.restore_from_s3()
|
| 122 |
+
|
| 123 |
+
# Should fail validation and fall back
|
| 124 |
+
assert result == RestoreResult.VALIDATION_FAILED
|
| 125 |
+
finally:
|
| 126 |
+
if os.path.exists(db_path):
|
| 127 |
+
os.unlink(db_path)
|
| 128 |
+
|
| 129 |
+
@patch('boto3.client')
|
| 130 |
+
def test_restore_no_backup_found(self, mock_boto_client):
|
| 131 |
+
"""Test restore when no backups exist in S3."""
|
| 132 |
+
mock_s3 = MagicMock()
|
| 133 |
+
mock_boto_client.return_value = mock_s3
|
| 134 |
+
|
| 135 |
+
# No backups in S3
|
| 136 |
+
mock_s3.list_objects_v2.return_value = {}
|
| 137 |
+
|
| 138 |
+
config = S3Config(enabled=True, bucket='test-bucket')
|
| 139 |
+
|
| 140 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 141 |
+
db_path = tmp.name
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
with patch.object(config, 'create_s3_client', return_value=mock_s3):
|
| 145 |
+
manager = RestoreManager(config, db_path)
|
| 146 |
+
result = manager.restore_from_s3()
|
| 147 |
+
|
| 148 |
+
assert result == RestoreResult.NO_BACKUP_FOUND
|
| 149 |
+
finally:
|
| 150 |
+
if os.path.exists(db_path):
|
| 151 |
+
os.unlink(db_path)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
class TestRestoreManagerTimestampComparison:
|
| 155 |
+
"""Tests for RestoreManager._compare_timestamps() method."""
|
| 156 |
+
|
| 157 |
+
def test_timestamp_comparison_s3_newer(self):
|
| 158 |
+
"""Test timestamp comparison when S3 backup is newer."""
|
| 159 |
+
config = S3Config(enabled=True)
|
| 160 |
+
|
| 161 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 162 |
+
db_path = tmp.name
|
| 163 |
+
|
| 164 |
+
try:
|
| 165 |
+
# Create local file
|
| 166 |
+
with open(db_path, 'w') as f:
|
| 167 |
+
f.write('test')
|
| 168 |
+
|
| 169 |
+
# Set local mtime to past
|
| 170 |
+
old_time = datetime(2025, 11, 27, 10, 0, 0).timestamp()
|
| 171 |
+
os.utime(db_path, (old_time, old_time))
|
| 172 |
+
|
| 173 |
+
with patch.object(config, 'create_s3_client'):
|
| 174 |
+
manager = RestoreManager(config, db_path)
|
| 175 |
+
|
| 176 |
+
# S3 timestamp is newer
|
| 177 |
+
s3_timestamp = datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc)
|
| 178 |
+
result = manager._compare_timestamps(db_path, s3_timestamp)
|
| 179 |
+
|
| 180 |
+
assert result is True
|
| 181 |
+
finally:
|
| 182 |
+
os.unlink(db_path)
|
| 183 |
+
|
| 184 |
+
def test_timestamp_comparison_local_newer(self):
|
| 185 |
+
"""Test timestamp comparison when local file is newer."""
|
| 186 |
+
config = S3Config(enabled=True)
|
| 187 |
+
|
| 188 |
+
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
|
| 189 |
+
db_path = tmp.name
|
| 190 |
+
|
| 191 |
+
try:
|
| 192 |
+
# Create local file with current time
|
| 193 |
+
with open(db_path, 'w') as f:
|
| 194 |
+
f.write('test')
|
| 195 |
+
|
| 196 |
+
with patch.object(config, 'create_s3_client'):
|
| 197 |
+
manager = RestoreManager(config, db_path)
|
| 198 |
+
|
| 199 |
+
# S3 timestamp is older
|
| 200 |
+
s3_timestamp = datetime(2025, 11, 27, 10, 0, 0, tzinfo=timezone.utc)
|
| 201 |
+
result = manager._compare_timestamps(db_path, s3_timestamp)
|
| 202 |
+
|
| 203 |
+
assert result is False
|
| 204 |
+
finally:
|
| 205 |
+
os.unlink(db_path)
|