Spaces:
Sleeping
Sleeping
| """Unit tests for Pinecone Pydantic models.""" | |
| from __future__ import annotations | |
| import json | |
| import pytest | |
| from tools.pinecone_models import PineconeRecord, ProcessedStandardSet | |
| class TestEducationLevelsProcessing: | |
| """Test education_levels field validator.""" | |
| def test_simple_array(self): | |
| """Test simple array without comma-separated values.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01", "02"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="test-id", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| assert record.education_levels == ["01", "02"] | |
| def test_comma_separated_strings(self): | |
| """Test array with comma-separated values.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01,02", "02", "03"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="test-id", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| assert record.education_levels == ["01", "02", "03"] | |
| def test_high_school_range(self): | |
| """Test high school grade levels.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="High School", | |
| subject="Math", | |
| education_levels=["09,10,11,12"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="test-id", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| assert record.education_levels == ["09", "10", "11", "12"] | |
| def test_empty_array(self): | |
| """Test empty array.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=[], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="test-id", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| assert record.education_levels == [] | |
| def test_whitespace_handling(self): | |
| """Test that whitespace is stripped.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01 , 02", " 03 "], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="test-id", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| assert record.education_levels == ["01", "02", "03"] | |
| class TestParentIdNullHandling: | |
| """Test that parent_id null is properly serialized.""" | |
| def test_root_node_parent_id_null(self): | |
| """Test root node has parent_id as null.""" | |
| record = PineconeRecord( | |
| **{"_id": "root-id"}, | |
| content="Root content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=False, | |
| is_root=True, | |
| parent_id=None, | |
| root_id="root-id", | |
| ancestor_ids=[], | |
| child_ids=["child-1"], | |
| sibling_count=0, | |
| ) | |
| assert record.parent_id is None | |
| # Test JSON serialization preserves null | |
| json_str = record.model_dump_json() | |
| data = json.loads(json_str) | |
| assert data["parent_id"] is None | |
| def test_child_node_parent_id_set(self): | |
| """Test child node has parent_id set.""" | |
| record = PineconeRecord( | |
| **{"_id": "child-id"}, | |
| content="Child content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=1, | |
| is_leaf=True, | |
| is_root=False, | |
| parent_id="parent-id", | |
| root_id="root-id", | |
| ancestor_ids=["root-id"], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| assert record.parent_id == "parent-id" | |
| # Test JSON serialization | |
| json_str = record.model_dump_json() | |
| data = json.loads(json_str) | |
| assert data["parent_id"] == "parent-id" | |
| class TestOptionalFields: | |
| """Test optional fields can be omitted.""" | |
| def test_all_optional_fields_omitted(self): | |
| """Test record with all optional fields omitted.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="test-id", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| assert record.normalized_subject is None | |
| assert record.asn_identifier is None | |
| assert record.statement_notation is None | |
| assert record.statement_label is None | |
| assert record.publication_status is None | |
| def test_optional_fields_set(self): | |
| """Test record with optional fields set.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| normalized_subject="Math", | |
| education_levels=["01"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| publication_status="Published", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| asn_identifier="S12345", | |
| statement_notation="1.G.K", | |
| statement_label="Standard", | |
| depth=1, | |
| is_leaf=True, | |
| is_root=False, | |
| parent_id="parent-id", | |
| root_id="root-id", | |
| ancestor_ids=["root-id"], | |
| child_ids=[], | |
| sibling_count=1, | |
| ) | |
| assert record.normalized_subject == "Math" | |
| assert record.asn_identifier == "S12345" | |
| assert record.statement_notation == "1.G.K" | |
| assert record.statement_label == "Standard" | |
| assert record.publication_status == "Published" | |
| class TestProcessedStandardSet: | |
| """Test ProcessedStandardSet container model.""" | |
| def test_empty_records(self): | |
| """Test ProcessedStandardSet with empty records.""" | |
| processed = ProcessedStandardSet(records=[]) | |
| assert processed.records == [] | |
| def test_multiple_records(self): | |
| """Test ProcessedStandardSet with multiple records.""" | |
| record1 = PineconeRecord( | |
| **{"_id": "id-1"}, | |
| content="Content 1", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="id-1", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| record2 = PineconeRecord( | |
| **{"_id": "id-2"}, | |
| content="Content 2", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=1, | |
| is_leaf=True, | |
| is_root=False, | |
| parent_id="id-1", | |
| root_id="id-1", | |
| ancestor_ids=["id-1"], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| processed = ProcessedStandardSet(records=[record1, record2]) | |
| assert len(processed.records) == 2 | |
| assert processed.records[0].id == "id-1" | |
| assert processed.records[1].id == "id-2" | |
| def test_json_serialization(self): | |
| """Test JSON serialization of ProcessedStandardSet.""" | |
| record = PineconeRecord( | |
| **{"_id": "test-id"}, | |
| content="Test content", | |
| standard_set_id="set-1", | |
| standard_set_title="Grade 1", | |
| subject="Math", | |
| education_levels=["01"], | |
| document_id="doc-1", | |
| document_valid="2021", | |
| jurisdiction_id="jur-1", | |
| jurisdiction_title="Wyoming", | |
| depth=0, | |
| is_leaf=True, | |
| is_root=True, | |
| root_id="test-id", | |
| ancestor_ids=[], | |
| child_ids=[], | |
| sibling_count=0, | |
| ) | |
| processed = ProcessedStandardSet(records=[record]) | |
| json_str = processed.model_dump_json(by_alias=True) | |
| data = json.loads(json_str) | |
| assert "records" in data | |
| assert len(data["records"]) == 1 | |
| assert data["records"][0]["_id"] == "test-id" | |
| assert data["records"][0]["parent_id"] is None # Verify null handling | |