Spaces:
Sleeping
Sleeping
| """CLI entry point for EduMatch Data Management.""" | |
| from __future__ import annotations | |
| import sys | |
| from pathlib import Path | |
| # Add project root to Python path | |
| project_root = Path(__file__).parent.parent | |
| if str(project_root) not in sys.path: | |
| sys.path.insert(0, str(project_root)) | |
| import typer | |
| from loguru import logger | |
| from rich.console import Console | |
| from rich.table import Table | |
| from tools import api_client, data_manager | |
| from tools.config import get_settings | |
| from tools.pinecone_processor import process_and_save | |
| settings = get_settings() | |
| # Configure logger | |
| logger.remove() # Remove default handler | |
| logger.add( | |
| sys.stderr, | |
| format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>", | |
| ) | |
| logger.add( | |
| settings.log_file, | |
| rotation=settings.log_rotation, | |
| retention=settings.log_retention, | |
| format="{time} | {level} | {message}", | |
| ) | |
| app = typer.Typer(help="Common Core MCP CLI - Manage educational standards data") | |
| console = Console() | |
| def jurisdictions( | |
| search: str = typer.Option( | |
| None, | |
| "--search", | |
| "-s", | |
| help="Filter by jurisdiction name (case-insensitive partial match)", | |
| ), | |
| type: str = typer.Option( | |
| None, | |
| "--type", | |
| "-t", | |
| help="Filter by jurisdiction type: school, organization, state, or nation", | |
| ), | |
| force: bool = typer.Option( | |
| False, "--force", "-f", help="Force refresh from API, ignoring local cache" | |
| ), | |
| ): | |
| """ | |
| List all available jurisdictions (states/organizations). | |
| By default, jurisdictions are loaded from local cache (data/raw/jurisdictions.json) | |
| to avoid repeated API calls. Use --force to fetch fresh data from the API and update | |
| the cache. The cache is automatically created on first use. | |
| Filters can be combined: use --search to filter by name and --type to filter by type. | |
| """ | |
| try: | |
| if force: | |
| console.print("[yellow]Forcing refresh from API...[/yellow]") | |
| # Validate type filter if provided | |
| if type: | |
| valid_types = {"school", "organization", "state", "nation"} | |
| if type.lower() not in valid_types: | |
| console.print( | |
| f"[red]Error: Invalid type '{type}'. Must be one of: {', '.join(sorted(valid_types))}[/red]" | |
| ) | |
| raise typer.Exit(code=1) | |
| results = api_client.get_jurisdictions( | |
| search_term=search, type_filter=type, force_refresh=force | |
| ) | |
| table = Table("ID", "Title", "Type", title="Jurisdictions") | |
| for j in results: | |
| table.add_row(j.id, j.title, j.type) | |
| console.print(table) | |
| console.print(f"\n[green]Found {len(results)} jurisdictions[/green]") | |
| if not force: | |
| console.print("[dim]Tip: Use --force to refresh from API[/dim]") | |
| except Exception as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| logger.exception("Failed to fetch jurisdictions") | |
| raise typer.Exit(code=1) | |
| def jurisdiction_details( | |
| jurisdiction_id: str = typer.Argument(..., help="Jurisdiction ID"), | |
| force: bool = typer.Option( | |
| False, "--force", "-f", help="Force refresh from API, ignoring local cache" | |
| ), | |
| ): | |
| """ | |
| Download and display jurisdiction metadata including standard set references. | |
| By default, jurisdiction metadata is loaded from local cache (data/raw/jurisdictions/{id}/data.json) | |
| to avoid repeated API calls. Use --force to fetch fresh data from the API and update the cache. | |
| The cache is automatically created on first use. | |
| Note: This command downloads metadata about standard sets (IDs, titles, subjects) but NOT | |
| the full standard set content. Use the 'download' command to get full standard set data. | |
| """ | |
| try: | |
| if force: | |
| console.print("[yellow]Forcing refresh from API...[/yellow]") | |
| jurisdiction_data = api_client.get_jurisdiction_details( | |
| jurisdiction_id, force_refresh=force | |
| ) | |
| # Display jurisdiction info | |
| console.print(f"\n[bold]Jurisdiction:[/bold] {jurisdiction_data.title}") | |
| console.print(f"[bold]Type:[/bold] {jurisdiction_data.type}") | |
| console.print(f"[bold]ID:[/bold] {jurisdiction_data.id}") | |
| # Display standard sets | |
| standard_sets = jurisdiction_data.standardSets | |
| if standard_sets: | |
| table = Table( | |
| "Set ID", "Subject", "Title", "Grade Levels", title="Standard Sets" | |
| ) | |
| for s in standard_sets: | |
| grade_levels = ", ".join(s.educationLevels) | |
| table.add_row( | |
| s.id, | |
| s.subject, | |
| s.title, | |
| grade_levels or "N/A", | |
| ) | |
| console.print("\n") | |
| console.print(table) | |
| console.print(f"\n[green]Found {len(standard_sets)} standard sets[/green]") | |
| else: | |
| console.print("\n[yellow]No standard sets found[/yellow]") | |
| if not force: | |
| console.print("[dim]Tip: Use --force to refresh from API[/dim]") | |
| except Exception as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| logger.exception("Failed to fetch jurisdiction details") | |
| raise typer.Exit(code=1) | |
| def download_sets( | |
| set_id: str = typer.Argument(None, help="Standard set ID (if downloading by ID)"), | |
| jurisdiction: str = typer.Option( | |
| None, | |
| "--jurisdiction", | |
| "-j", | |
| help="Jurisdiction ID (if downloading by jurisdiction)", | |
| ), | |
| force: bool = typer.Option( | |
| False, "--force", "-f", help="Force refresh from API, ignoring local cache" | |
| ), | |
| yes: bool = typer.Option( | |
| False, | |
| "--yes", | |
| "-y", | |
| help="Skip confirmation prompt when downloading by jurisdiction", | |
| ), | |
| dry_run: bool = typer.Option( | |
| False, | |
| "--dry-run", | |
| help="Show what would be downloaded without actually downloading", | |
| ), | |
| education_levels: str = typer.Option( | |
| None, | |
| "--education-levels", | |
| help="Comma-separated grade levels (e.g., '03,04,05')", | |
| ), | |
| publication_status: str = typer.Option( | |
| None, | |
| "--publication-status", | |
| help="Publication status filter (e.g., 'Published', 'Deprecated')", | |
| ), | |
| valid_year: str = typer.Option( | |
| None, "--valid-year", help="Valid year filter (e.g., '2012')" | |
| ), | |
| title: str = typer.Option( | |
| None, "--title", help="Partial title match (case-insensitive)" | |
| ), | |
| subject: str = typer.Option( | |
| None, "--subject", help="Partial subject match (case-insensitive)" | |
| ), | |
| ): | |
| """ | |
| Download standard sets either by ID or by jurisdiction with filtering. | |
| When downloading by jurisdiction, filters can be applied and all filters combine with AND logic. | |
| A confirmation prompt will be shown listing all standard sets that will be downloaded. | |
| Use --dry-run to preview what would be downloaded without actually downloading anything. | |
| """ | |
| try: | |
| # Validate arguments | |
| if not set_id and not jurisdiction: | |
| console.print( | |
| "[red]Error: Must provide either set_id or --jurisdiction[/red]" | |
| ) | |
| raise typer.Exit(code=1) | |
| if set_id and jurisdiction: | |
| console.print( | |
| "[red]Error: Cannot specify both set_id and --jurisdiction[/red]" | |
| ) | |
| raise typer.Exit(code=1) | |
| # Download by ID | |
| if set_id: | |
| if dry_run: | |
| console.print( | |
| f"[yellow][DRY RUN] Would download standard set: {set_id}[/yellow]" | |
| ) | |
| cache_path = Path("data/raw/standardSets") / set_id / "data.json" | |
| console.print(f" Would cache to: {cache_path}") | |
| return | |
| with console.status(f"[bold blue]Downloading standard set {set_id}..."): | |
| api_client.download_standard_set(set_id, force_refresh=force) | |
| cache_path = Path("data/raw/standardSets") / set_id / "data.json" | |
| console.print("[green]β Successfully downloaded standard set[/green]") | |
| console.print(f" Cached to: {cache_path}") | |
| # Process the downloaded set | |
| try: | |
| with console.status(f"[bold blue]Processing standard set {set_id}..."): | |
| processed_path = process_and_save(set_id) | |
| console.print("[green]β Successfully processed standard set[/green]") | |
| console.print(f" Processed to: {processed_path}") | |
| except FileNotFoundError: | |
| console.print( | |
| "[yellow]Warning: data.json not found, skipping processing[/yellow]" | |
| ) | |
| except Exception as e: | |
| console.print( | |
| f"[yellow]Warning: Failed to process standard set: {e}[/yellow]" | |
| ) | |
| logger.exception(f"Failed to process standard set {set_id}") | |
| return | |
| # Download by jurisdiction | |
| if jurisdiction: | |
| # Parse education levels | |
| education_levels_list = None | |
| if education_levels: | |
| education_levels_list = [ | |
| level.strip() for level in education_levels.split(",") | |
| ] | |
| # Get jurisdiction metadata | |
| jurisdiction_data = api_client.get_jurisdiction_details( | |
| jurisdiction, force_refresh=False | |
| ) | |
| all_sets = jurisdiction_data.standardSets | |
| # Apply filters using the API client's filter function | |
| from tools.api_client import _filter_standard_set | |
| filtered_sets = [ | |
| s | |
| for s in all_sets | |
| if _filter_standard_set( | |
| s, | |
| education_levels=education_levels_list, | |
| publication_status=publication_status, | |
| valid_year=valid_year, | |
| title_search=title, | |
| subject_search=subject, | |
| ) | |
| ] | |
| if not filtered_sets: | |
| console.print( | |
| "[yellow]No standard sets match the provided filters.[/yellow]" | |
| ) | |
| return | |
| # Display filtered sets | |
| if dry_run: | |
| console.print( | |
| f"\n[yellow][DRY RUN] Standard sets that would be downloaded ({len(filtered_sets)}):[/yellow]" | |
| ) | |
| else: | |
| console.print( | |
| f"\n[bold]Standard sets to download ({len(filtered_sets)}):[/bold]" | |
| ) | |
| table = Table( | |
| "Set ID", | |
| "Subject", | |
| "Title", | |
| "Grade Levels", | |
| "Status", | |
| "Year", | |
| "Downloaded", | |
| title="Standard Sets", | |
| ) | |
| for s in filtered_sets: | |
| display_id = s.id[:20] + "..." if len(s.id) > 20 else s.id | |
| # Check if already downloaded | |
| set_data_path = settings.standard_sets_dir / s.id / "data.json" | |
| is_downloaded = set_data_path.exists() | |
| downloaded_status = ( | |
| "[green]β[/green]" if is_downloaded else "[yellow]β[/yellow]" | |
| ) | |
| table.add_row( | |
| display_id, | |
| s.subject, | |
| s.title[:40], | |
| ", ".join(s.educationLevels), | |
| s.document.publicationStatus or "N/A", | |
| s.document.valid, | |
| downloaded_status, | |
| ) | |
| console.print(table) | |
| # If dry run, show summary and exit | |
| if dry_run: | |
| console.print( | |
| f"\n[yellow][DRY RUN] Would download {len(filtered_sets)} standard set(s)[/yellow]" | |
| ) | |
| console.print( | |
| "[dim]Run without --dry-run to actually download these standard sets.[/dim]" | |
| ) | |
| return | |
| # Confirmation prompt | |
| if not yes: | |
| if not typer.confirm( | |
| f"\nDownload {len(filtered_sets)} standard set(s)?" | |
| ): | |
| console.print("[yellow]Download cancelled.[/yellow]") | |
| return | |
| # Download each standard set | |
| console.print( | |
| f"\n[bold blue]Downloading {len(filtered_sets)} standard set(s)...[/bold blue]" | |
| ) | |
| downloaded = 0 | |
| failed = 0 | |
| for i, standard_set in enumerate(filtered_sets, 1): | |
| set_id = standard_set.id | |
| try: | |
| with console.status( | |
| f"[bold blue][{i}/{len(filtered_sets)}] Downloading {set_id[:20]}..." | |
| ): | |
| api_client.download_standard_set(set_id, force_refresh=force) | |
| downloaded += 1 | |
| # Process the downloaded set | |
| try: | |
| with console.status( | |
| f"[bold blue][{i}/{len(filtered_sets)}] Processing {set_id[:20]}..." | |
| ): | |
| process_and_save(set_id) | |
| except FileNotFoundError: | |
| console.print( | |
| f"[yellow]Warning: Skipping processing for {set_id[:20]}... (data.json not found)[/yellow]" | |
| ) | |
| except Exception as e: | |
| console.print( | |
| f"[yellow]Warning: Failed to process {set_id[:20]}...: {e}[/yellow]" | |
| ) | |
| logger.exception(f"Failed to process standard set {set_id}") | |
| except Exception as e: | |
| console.print(f"[red]β Failed to download {set_id}: {e}[/red]") | |
| logger.exception(f"Failed to download standard set {set_id}") | |
| failed += 1 | |
| # Summary | |
| console.print( | |
| f"\n[green]β Successfully downloaded {downloaded} standard set(s)[/green]" | |
| ) | |
| if failed > 0: | |
| console.print( | |
| f"[red]β Failed to download {failed} standard set(s)[/red]" | |
| ) | |
| except Exception as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| logger.exception("Failed to download standard sets") | |
| raise typer.Exit(code=1) | |
| def list_datasets(): | |
| """List all downloaded standard sets and their processing status.""" | |
| try: | |
| datasets = data_manager.list_downloaded_standard_sets() | |
| if not datasets: | |
| console.print("[yellow]No standard sets downloaded yet.[/yellow]") | |
| console.print("[dim]Use 'download-sets' to download standard sets.[/dim]") | |
| return | |
| # Check for processed.json files | |
| for d in datasets: | |
| set_dir = settings.standard_sets_dir / d.set_id | |
| processed_file = set_dir / "processed.json" | |
| d.processed = processed_file.exists() | |
| # Count processed vs unprocessed | |
| processed_count = sum(1 for d in datasets if d.processed) | |
| unprocessed_count = len(datasets) - processed_count | |
| table = Table( | |
| "Set ID", | |
| "Jurisdiction", | |
| "Subject", | |
| "Title", | |
| "Grades", | |
| "Status", | |
| "Processed", | |
| title="Downloaded Standard Sets", | |
| ) | |
| for d in datasets: | |
| # Truncate long set IDs | |
| display_id = d.set_id[:25] + "..." if len(d.set_id) > 25 else d.set_id | |
| table.add_row( | |
| display_id, | |
| d.jurisdiction, | |
| d.subject[:30], | |
| d.title[:30], | |
| ", ".join(d.education_levels), | |
| d.publication_status, | |
| "[green]β[/green]" if d.processed else "[yellow]β[/yellow]", | |
| ) | |
| console.print(table) | |
| console.print("\n[bold]Summary:[/bold]") | |
| console.print(f" Total: {len(datasets)} standard sets") | |
| console.print(f" Processed: [green]{processed_count}[/green]") | |
| console.print(f" Unprocessed: [yellow]{unprocessed_count}[/yellow]") | |
| except Exception as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| logger.exception("Failed to list datasets") | |
| raise typer.Exit(code=1) | |
| def pinecone_init(): | |
| """ | |
| Initialize Pinecone index. | |
| Checks if the configured index exists and creates it if not. | |
| Uses integrated embeddings with llama-text-embed-v2 model. | |
| """ | |
| try: | |
| from src.pinecone_client import PineconeClient | |
| console.print("[bold]Initializing Pinecone...[/bold]") | |
| # Initialize Pinecone client (validates API key) | |
| try: | |
| client = PineconeClient() | |
| except ValueError as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| raise typer.Exit(code=1) | |
| console.print(f" Index name: [cyan]{client.index_name}[/cyan]") | |
| console.print(f" Namespace: [cyan]{client.namespace}[/cyan]") | |
| # Check and create index if needed | |
| with console.status("[bold blue]Checking index status..."): | |
| created = client.ensure_index_exists() | |
| if created: | |
| console.print( | |
| f"\n[green]Successfully created index '{client.index_name}'[/green]" | |
| ) | |
| console.print("[dim]Index configuration:[/dim]") | |
| console.print(" Cloud: aws") | |
| console.print(" Region: us-east-1") | |
| console.print(" Embedding model: llama-text-embed-v2") | |
| console.print(" Field map: text -> content") | |
| else: | |
| console.print( | |
| f"\n[green]Index '{client.index_name}' already exists[/green]" | |
| ) | |
| # Show index stats | |
| with console.status("[bold blue]Fetching index stats..."): | |
| stats = client.get_index_stats() | |
| console.print("\n[bold]Index Statistics:[/bold]") | |
| console.print(f" Total vectors: [cyan]{stats['total_vector_count']}[/cyan]") | |
| namespaces = stats.get("namespaces", {}) | |
| if namespaces: | |
| console.print(f" Namespaces: [cyan]{len(namespaces)}[/cyan]") | |
| table = Table("Namespace", "Vector Count", title="Namespace Details") | |
| for ns_name, ns_info in namespaces.items(): | |
| vector_count = getattr(ns_info, "vector_count", 0) | |
| table.add_row(ns_name or "(default)", str(vector_count)) | |
| console.print(table) | |
| else: | |
| console.print(" Namespaces: [yellow]None (empty index)[/yellow]") | |
| except Exception as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| logger.exception("Failed to initialize Pinecone") | |
| raise typer.Exit(code=1) | |
| def pinecone_upload( | |
| set_id: str = typer.Option( | |
| None, "--set-id", help="Upload a specific standard set by ID" | |
| ), | |
| all: bool = typer.Option( | |
| False, "--all", help="Upload all downloaded standard sets with processed.json" | |
| ), | |
| force: bool = typer.Option( | |
| False, | |
| "--force", | |
| help="Re-upload even if .pinecone_uploaded marker exists", | |
| ), | |
| dry_run: bool = typer.Option( | |
| False, | |
| "--dry-run", | |
| help="Show what would be uploaded without actually uploading", | |
| ), | |
| batch_size: int = typer.Option( | |
| 96, "--batch-size", help="Number of records per batch (default: 96)" | |
| ), | |
| ): | |
| """ | |
| Upload processed standard sets to Pinecone. | |
| Use --set-id to upload a specific set, or --all to upload all sets with processed.json. | |
| If neither is provided, you'll be prompted to confirm uploading all sets. | |
| """ | |
| try: | |
| from src.pinecone_client import PineconeClient | |
| from tools.pinecone_models import ProcessedStandardSet | |
| import json | |
| # Initialize Pinecone client | |
| try: | |
| client = PineconeClient() | |
| except ValueError as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| raise typer.Exit(code=1) | |
| # Validate index exists | |
| try: | |
| client.validate_index() | |
| except ValueError as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| raise typer.Exit(code=1) | |
| # Discover standard sets with processed.json | |
| standard_sets_dir = settings.standard_sets_dir | |
| if not standard_sets_dir.exists(): | |
| console.print("[yellow]No standard sets directory found.[/yellow]") | |
| console.print( | |
| "[dim]Use 'download-sets' to download standard sets first.[/dim]" | |
| ) | |
| return | |
| # Find all sets with processed.json | |
| sets_to_upload = [] | |
| for set_dir in standard_sets_dir.iterdir(): | |
| if not set_dir.is_dir(): | |
| continue | |
| processed_file = set_dir / "processed.json" | |
| if not processed_file.exists(): | |
| continue | |
| set_id_from_dir = set_dir.name | |
| # Check if already uploaded (unless --force) | |
| # Mark all sets during discovery; filtering by --set-id happens later | |
| if not force and PineconeClient.is_uploaded(set_dir): | |
| sets_to_upload.append( | |
| (set_id_from_dir, set_dir, True) | |
| ) # True = already uploaded | |
| else: | |
| sets_to_upload.append( | |
| (set_id_from_dir, set_dir, False) | |
| ) # False = needs upload | |
| if not sets_to_upload: | |
| console.print( | |
| "[yellow]No standard sets with processed.json found.[/yellow]" | |
| ) | |
| console.print( | |
| "[dim]Use 'download-sets' to download and process standard sets first.[/dim]" | |
| ) | |
| return | |
| # Filter by --set-id if provided | |
| if set_id: | |
| sets_to_upload = [ | |
| (sid, sdir, skipped) | |
| for sid, sdir, skipped in sets_to_upload | |
| if sid == set_id | |
| ] | |
| if not sets_to_upload: | |
| console.print( | |
| f"[yellow]Standard set '{set_id}' not found or has no processed.json.[/yellow]" | |
| ) | |
| return | |
| # If neither --set-id nor --all provided, prompt for confirmation | |
| if not set_id and not all: | |
| console.print( | |
| f"\n[bold]Found {len(sets_to_upload)} standard set(s) with processed.json:[/bold]" | |
| ) | |
| table = Table("Set ID", "Status", title="Standard Sets") | |
| for sid, sdir, skipped in sets_to_upload: | |
| status = ( | |
| "[yellow]Already uploaded[/yellow]" | |
| if skipped | |
| else "[green]Ready[/green]" | |
| ) | |
| table.add_row(sid, status) | |
| console.print(table) | |
| if not typer.confirm( | |
| f"\nUpload {len(sets_to_upload)} standard set(s) to Pinecone?" | |
| ): | |
| console.print("[yellow]Upload cancelled.[/yellow]") | |
| return | |
| # Show what would be uploaded (dry-run or preview) | |
| if dry_run or not all: | |
| console.print( | |
| f"\n[bold]Standard sets to upload ({len(sets_to_upload)}):[/bold]" | |
| ) | |
| table = Table("Set ID", "Records", "Status", title="Upload Preview") | |
| for sid, sdir, skipped in sets_to_upload: | |
| if skipped and not force: | |
| table.add_row( | |
| sid, "N/A", "[yellow]Skipped (already uploaded)[/yellow]" | |
| ) | |
| continue | |
| # Load processed.json to count records | |
| try: | |
| with open(sdir / "processed.json", encoding="utf-8") as f: | |
| processed_data = json.load(f) | |
| record_count = len(processed_data.get("records", [])) | |
| status = ( | |
| "[green]Ready[/green]" | |
| if not dry_run | |
| else "[yellow]Would upload[/yellow]" | |
| ) | |
| table.add_row(sid, str(record_count), status) | |
| except Exception as e: | |
| table.add_row(sid, "Error", f"[red]Failed to read: {e}[/red]") | |
| console.print(table) | |
| if dry_run: | |
| console.print( | |
| f"\n[yellow][DRY RUN] Would upload {len([s for s in sets_to_upload if not s[2] or force])} standard set(s)[/yellow]" | |
| ) | |
| console.print("[dim]Run without --dry-run to actually upload.[/dim]") | |
| return | |
| # Perform uploads | |
| uploaded_count = 0 | |
| failed_count = 0 | |
| skipped_count = 0 | |
| for i, (sid, sdir, already_uploaded) in enumerate(sets_to_upload, 1): | |
| if already_uploaded and not force: | |
| skipped_count += 1 | |
| continue | |
| try: | |
| # Load processed.json | |
| with open(sdir / "processed.json", encoding="utf-8") as f: | |
| processed_data = json.load(f) | |
| processed_set = ProcessedStandardSet(**processed_data) | |
| records = processed_set.records | |
| if not records: | |
| console.print( | |
| f"[yellow]Skipping {sid} (no records)[/yellow]" | |
| ) | |
| skipped_count += 1 | |
| continue | |
| # Upload records | |
| with console.status( | |
| f"[bold blue][{i}/{len(sets_to_upload)}] Uploading {sid} ({len(records)} records)" | |
| ): | |
| client.batch_upsert(records, batch_size=batch_size) | |
| # Mark as uploaded | |
| PineconeClient.mark_uploaded(sdir) | |
| uploaded_count += 1 | |
| console.print( | |
| f"[green]β [{i}/{len(sets_to_upload)}] Uploaded {sid} ({len(records)} records)[/green]" | |
| ) | |
| except FileNotFoundError: | |
| console.print( | |
| f"[red]β [{i}/{len(sets_to_upload)}] Failed: {sid} (processed.json not found)[/red]" | |
| ) | |
| logger.exception(f"Failed to upload standard set {sid}") | |
| failed_count += 1 | |
| except Exception as e: | |
| console.print( | |
| f"[red]β [{i}/{len(sets_to_upload)}] Failed: {sid} ({e})[/red]" | |
| ) | |
| logger.exception(f"Failed to upload standard set {sid}") | |
| failed_count += 1 | |
| # Summary | |
| console.print("\n[bold]Upload Summary:[/bold]") | |
| console.print(f" Uploaded: [green]{uploaded_count}[/green]") | |
| if skipped_count > 0: | |
| console.print(f" Skipped: [yellow]{skipped_count}[/yellow]") | |
| if failed_count > 0: | |
| console.print(f" Failed: [red]{failed_count}[/red]") | |
| except Exception as e: | |
| console.print(f"[red]Error: {e}[/red]") | |
| logger.exception("Failed to upload to Pinecone") | |
| raise typer.Exit(code=1) | |
| if __name__ == "__main__": | |
| app() | |