Spaces:
Sleeping
Sleeping
File size: 2,352 Bytes
7602502 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
"""Manages local data storage and metadata tracking."""
from __future__ import annotations
import json
from dataclasses import dataclass
from loguru import logger
from tools.config import get_settings
from tools.models import StandardSetResponse
settings = get_settings()
# Data directories (from config)
RAW_DATA_DIR = settings.raw_data_dir
STANDARD_SETS_DIR = settings.standard_sets_dir
PROCESSED_DATA_DIR = settings.processed_data_dir
@dataclass
class StandardSetInfo:
"""Information about a downloaded standard set with processing status."""
set_id: str
title: str
subject: str
education_levels: list[str]
jurisdiction: str
publication_status: str
valid_year: str
processed: bool
def list_downloaded_standard_sets() -> list[StandardSetInfo]:
"""
List all downloaded standard sets from the standardSets directory.
Returns:
List of StandardSetInfo with standard set info and processing status
"""
if not STANDARD_SETS_DIR.exists():
return []
datasets = []
for set_dir in STANDARD_SETS_DIR.iterdir():
if not set_dir.is_dir():
continue
data_file = set_dir / "data.json"
if not data_file.exists():
continue
try:
with open(data_file, encoding="utf-8") as f:
raw_data = json.load(f)
# Parse the API response wrapper
response = StandardSetResponse(**raw_data)
standard_set = response.data
# Build the dataset info
dataset_info = StandardSetInfo(
set_id=standard_set.id,
title=standard_set.title,
subject=standard_set.subject,
education_levels=standard_set.educationLevels,
jurisdiction=standard_set.jurisdiction.title,
publication_status=standard_set.document.publicationStatus or "Unknown",
valid_year=standard_set.document.valid,
processed=False, # TODO: Check against processed directory
)
datasets.append(dataset_info)
except (json.JSONDecodeError, IOError, Exception) as e:
logger.warning(f"Failed to read {data_file}: {e}")
continue
logger.debug(f"Found {len(datasets)} downloaded standard sets")
return datasets
|