From f3186acd5631f8a11e4df2c70747d94b328d1445 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Thu, 11 Dec 2025 14:42:44 +0100 Subject: [PATCH 1/7] feat: add ConstraintType to GraphSchema for constraint extraction --- .../experimental/components/schema.py | 81 +++++++++++++++++++ src/neo4j_graphrag/generation/prompts.py | 18 ++++- 2 files changed, 96 insertions(+), 3 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index 25ae85e75..bb41f6de7 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -161,6 +161,22 @@ def property_type_from_name(self, name: str) -> Optional[PropertyType]: return None +class ConstraintType(BaseModel): + """ + Represents a constraint on a node in the graph. + """ + + type: Literal[ + "UNIQUENESS" + ] # TODO: add other constraint types ["propertyExistence", "propertyType", "key"] + node_type: str + property_name: str + + model_config = ConfigDict( + frozen=True, + ) + + class GraphSchema(DataModel): """This model represents the expected node and relationship types in the graph. @@ -177,6 +193,7 @@ class GraphSchema(DataModel): node_types: Tuple[NodeType, ...] relationship_types: Tuple[RelationshipType, ...] = tuple() patterns: Tuple[Tuple[str, str, str], ...] = tuple() + constraints: Tuple[ConstraintType, ...] = tuple() additional_node_types: bool = Field( default_factory=default_additional_item("node_types") @@ -239,6 +256,21 @@ def validate_additional_parameters(self) -> Self: ) return self + @model_validator(mode="after") + def validate_constraints_against_node_types(self) -> Self: + if not self.constraints: + return self + for constraint in self.constraints: + if not constraint.property_name: + raise SchemaValidationError( + f"Constraint has no property name: {constraint}. Property name is required." + ) + if constraint.node_type not in self._node_type_index: + raise SchemaValidationError( + f"Constraint references undefined node type: {constraint.node_type}" + ) + return self + def node_type_from_label(self, label: str) -> Optional[NodeType]: return self._node_type_index.get(label) @@ -382,6 +414,7 @@ def create_schema_model( node_types: Sequence[NodeType], relationship_types: Optional[Sequence[RelationshipType]] = None, patterns: Optional[Sequence[Tuple[str, str, str]]] = None, + constraints: Optional[Sequence[ConstraintType]] = None, **kwargs: Any, ) -> GraphSchema: """ @@ -403,6 +436,7 @@ def create_schema_model( node_types=node_types, relationship_types=relationship_types or (), patterns=patterns or (), + constraints=constraints or (), **kwargs, ) ) @@ -415,6 +449,7 @@ async def run( node_types: Sequence[NodeType], relationship_types: Optional[Sequence[RelationshipType]] = None, patterns: Optional[Sequence[Tuple[str, str, str]]] = None, + constraints: Optional[Sequence[ConstraintType]] = None, **kwargs: Any, ) -> GraphSchema: """ @@ -432,6 +467,7 @@ async def run( node_types, relationship_types, patterns, + constraints, **kwargs, ) @@ -555,6 +591,41 @@ def _filter_relationships_without_labels( relationship_types, "relationship type" ) + def _filter_invalid_constraints( + self, constraints: List[Dict[str, Any]], node_types: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """Filter out constraints that reference undefined node types or have no property name.""" + if not constraints: + return [] + + if not node_types: + logging.info( + "Filtering out all constraints because no node types are defined. " + "Constraints reference node types that must be defined." + ) + return [] + + valid_node_labels = {node_type.get("label") for node_type in node_types} + + filtered_constraints = [] + for constraint in constraints: + # check if the property_name is provided + if not constraint.get("property_name"): + logging.info( + f"Filtering out constraint: {constraint}. " + f"Property name is not provided." + ) + continue + # check if the node_type is valid + if constraint.get("node_type") not in valid_node_labels: + logging.info( + f"Filtering out constraint: {constraint}. " + f"Node type '{constraint.get('node_type')}' is not valid. Valid node types: {valid_node_labels}" + ) + continue + filtered_constraints.append(constraint) + return filtered_constraints + def _clean_json_content(self, content: str) -> str: content = content.strip() @@ -624,6 +695,9 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema extracted_patterns: Optional[List[Tuple[str, str, str]]] = extracted_schema.get( "patterns" ) + extracted_constraints: Optional[List[Dict[str, Any]]] = extracted_schema.get( + "constraints" + ) # Filter out nodes and relationships without labels extracted_node_types = self._filter_nodes_without_labels(extracted_node_types) @@ -638,11 +712,18 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema extracted_patterns, extracted_node_types, extracted_relationship_types ) + # Filter out invalid constraints + if extracted_constraints: + extracted_constraints = self._filter_invalid_constraints( + extracted_constraints, extracted_node_types + ) + return GraphSchema.model_validate( { "node_types": extracted_node_types, "relationship_types": extracted_relationship_types, "patterns": extracted_patterns, + "constraints": extracted_constraints or [], } ) diff --git a/src/neo4j_graphrag/generation/prompts.py b/src/neo4j_graphrag/generation/prompts.py index d9045a944..6e3b80e1d 100644 --- a/src/neo4j_graphrag/generation/prompts.py +++ b/src/neo4j_graphrag/generation/prompts.py @@ -217,7 +217,11 @@ class SchemaExtractionTemplate(PromptTemplate): 4. Include property definitions only when the type can be confidently inferred, otherwise omit them. 5. When defining patterns, ensure that every node label and relationship label mentioned exists in your lists of node types and relationship types. 6. Do not create node types that aren't clearly mentioned in the text. -7. Keep your schema minimal and focused on clearly identifiable patterns in the text. +7. For each node type, identify a unique identifier property and add it as a UNIQUENESS constraint to the list of constraints. +8. Constraints must reference a node_type label that exists in the list of node types. +9. Each constraint must have a property_name having a name that indicates it is a unique identifier for the node type (e.g., person_id for Person, company_id for Company) +10. Keep your schema minimal and focused on clearly identifiable patterns in the text. + Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST, LOCAL_DATETIME, LOCAL_TIME, POINT, STRING, ZONED_DATETIME, ZONED_TIME. @@ -233,18 +237,26 @@ class SchemaExtractionTemplate(PromptTemplate): "type": "STRING" }} ] - }}, + }} ... ], "relationship_types": [ {{ "label": "WORKS_FOR" - }}, + }} ... ], "patterns": [ ["Person", "WORKS_FOR", "Company"], ... + ], + "constraints": [ + {{ + "type": "UNIQUENESS", + "node_type": "Person", + "property_name": "person_id" + }} + ... ] }} From 114072a5a02f8b52c9eb942c041bad365e6b4f9a Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Fri, 12 Dec 2025 12:52:42 +0100 Subject: [PATCH 2/7] feat: add tests for ConstraintType --- .../experimental/components/schema.py | 40 ++- src/neo4j_graphrag/generation/prompts.py | 12 +- .../experimental/components/test_schema.py | 311 ++++++++++++++++++ 3 files changed, 353 insertions(+), 10 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index bb41f6de7..d2804b676 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -76,7 +76,7 @@ class PropertyType(BaseModel): ] description: str = "" required: bool = False - + # unique: bool = False model_config = ConfigDict( frozen=True, ) @@ -269,6 +269,16 @@ def validate_constraints_against_node_types(self) -> Self: raise SchemaValidationError( f"Constraint references undefined node type: {constraint.node_type}" ) + # Check if property_name exists on the node type (only if additional_properties is False) + node_type = self._node_type_index[constraint.node_type] + if not node_type.additional_properties: + valid_property_names = {p.name for p in node_type.properties} + if constraint.property_name not in valid_property_names: + raise SchemaValidationError( + f"Constraint references undefined property '{constraint.property_name}' " + f"on node type '{constraint.node_type}'. " + f"Valid properties: {valid_property_names}" + ) return self def node_type_from_label(self, label: str) -> Optional[NodeType]: @@ -594,7 +604,8 @@ def _filter_relationships_without_labels( def _filter_invalid_constraints( self, constraints: List[Dict[str, Any]], node_types: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: - """Filter out constraints that reference undefined node types or have no property name.""" + """Filter out constraints that reference undefined node types, have no property name, + or reference a property that doesn't exist on the node type.""" if not constraints: return [] @@ -605,7 +616,16 @@ def _filter_invalid_constraints( ) return [] - valid_node_labels = {node_type.get("label") for node_type in node_types} + # Build a mapping of node_type label -> set of property names + node_type_properties: Dict[str, set[str]] = {} + for node_type_dict in node_types: + label = node_type_dict.get("label") + if label: + properties = node_type_dict.get("properties", []) + property_names = {p.get("name") for p in properties if p.get("name")} + node_type_properties[label] = property_names + + valid_node_labels = set(node_type_properties.keys()) filtered_constraints = [] for constraint in constraints: @@ -617,10 +637,20 @@ def _filter_invalid_constraints( ) continue # check if the node_type is valid - if constraint.get("node_type") not in valid_node_labels: + node_type = constraint.get("node_type") + if node_type not in valid_node_labels: + logging.info( + f"Filtering out constraint: {constraint}. " + f"Node type '{node_type}' is not valid. Valid node types: {valid_node_labels}" + ) + continue + # check if the property_name exists on the node type + property_name = constraint.get("property_name") + if property_name not in node_type_properties.get(node_type, set()): logging.info( f"Filtering out constraint: {constraint}. " - f"Node type '{constraint.get('node_type')}' is not valid. Valid node types: {valid_node_labels}" + f"Property '{property_name}' does not exist on node type '{node_type}'. " + f"Valid properties: {node_type_properties.get(node_type, set())}" ) continue filtered_constraints.append(constraint) diff --git a/src/neo4j_graphrag/generation/prompts.py b/src/neo4j_graphrag/generation/prompts.py index 6e3b80e1d..6fedb511b 100644 --- a/src/neo4j_graphrag/generation/prompts.py +++ b/src/neo4j_graphrag/generation/prompts.py @@ -217,10 +217,12 @@ class SchemaExtractionTemplate(PromptTemplate): 4. Include property definitions only when the type can be confidently inferred, otherwise omit them. 5. When defining patterns, ensure that every node label and relationship label mentioned exists in your lists of node types and relationship types. 6. Do not create node types that aren't clearly mentioned in the text. -7. For each node type, identify a unique identifier property and add it as a UNIQUENESS constraint to the list of constraints. -8. Constraints must reference a node_type label that exists in the list of node types. -9. Each constraint must have a property_name having a name that indicates it is a unique identifier for the node type (e.g., person_id for Person, company_id for Company) -10. Keep your schema minimal and focused on clearly identifiable patterns in the text. +7. Keep your schema minimal and focused on clearly identifiable patterns in the text. +8. UNIQUENESS CONSTRAINTS: +8.1 UNIQUENESS is optional; each node_type may or may not have exactly one uniqueness constraint. +8.2 Only use properties that seem to not have too many missing values in the sample. +8.3 Constraints reference node_types by label and specify which property is unique. +8.4 If a property appears in a uniqueness constraint it MUST also appear in the corresponding node_type as a property. Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST, @@ -254,7 +256,7 @@ class SchemaExtractionTemplate(PromptTemplate): {{ "type": "UNIQUENESS", "node_type": "Person", - "property_name": "person_id" + "property_name": "name" }} ... ] diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 9ff440557..1d7e2cc58 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -27,6 +27,7 @@ NodeType, PropertyType, RelationshipType, + ConstraintType, SchemaFromTextExtractor, GraphSchema, SchemaFromExistingGraphExtractor, @@ -119,6 +120,30 @@ def test_relationship_type_additional_properties_default() -> None: assert relationship_type.additional_properties is True +def test_constraint_type_initialization() -> None: + constraint = ConstraintType( + type="UNIQUENESS", node_type="Person", property_name="name" + ) + assert constraint.type == "UNIQUENESS" + assert constraint.node_type == "Person" + assert constraint.property_name == "name" + + +def test_constraint_type_is_frozen() -> None: + constraint = ConstraintType( + type="UNIQUENESS", node_type="Person", property_name="name" + ) + + with pytest.raises(ValidationError): + constraint.type = "UNIQUENESS" + + with pytest.raises(ValidationError): + constraint.node_type = "Organization" + + with pytest.raises(ValidationError): + constraint.property_name = "id" + + def test_schema_additional_node_types_default() -> None: schema_dict: dict[str, Any] = { "node_types": [], @@ -200,6 +225,61 @@ def test_schema_additional_parameter_validation() -> None: GraphSchema.model_validate(schema_dict) +def test_schema_with_valid_constraints() -> None: + schema_dict: dict[str, Any] = { + "node_types": [ + {"label": "Person", "properties": [{"name": "name", "type": "STRING"}]} + ], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"} + ], + } + schema = GraphSchema.model_validate(schema_dict) + + assert len(schema.constraints) == 1 + assert schema.constraints[0].type == "UNIQUENESS" + assert schema.constraints[0].node_type == "Person" + assert schema.constraints[0].property_name == "name" + + +def test_schema_constraint_validation_invalid_node_type() -> None: + schema_dict: dict[str, Any] = { + "node_types": [ + {"label": "Person", "properties": [{"name": "name", "type": "STRING"}]} + ], + "constraints": [ + { + "type": "UNIQUENESS", + "node_type": "NonExistentNode", + "property_name": "id", + } + ], + } + + with pytest.raises(SchemaValidationError) as exc_info: + GraphSchema.model_validate(schema_dict) + + assert "Constraint references undefined node type: NonExistentNode" in str( + exc_info.value + ) + + +def test_schema_constraint_validation_missing_property_name() -> None: + schema_dict: dict[str, Any] = { + "node_types": [ + {"label": "Person", "properties": [{"name": "name", "type": "STRING"}]} + ], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": ""} + ], + } + + with pytest.raises(SchemaValidationError) as exc_info: + GraphSchema.model_validate(schema_dict) + + assert "Constraint has no property name" in str(exc_info.value) + + @pytest.fixture def valid_node_types() -> tuple[NodeType, ...]: return ( @@ -258,6 +338,13 @@ def patterns_with_invalid_entity() -> tuple[tuple[str, str, str], ...]: ) +@pytest.fixture +def valid_constraints() -> tuple[ConstraintType, ...]: + return ( + ConstraintType(type="UNIQUENESS", node_type="PERSON", property_name="name"), + ) + + @pytest.fixture def patterns_with_invalid_relation() -> tuple[tuple[str, str, str], ...]: return (("PERSON", "NON_EXISTENT_RELATION", "ORGANIZATION"),) @@ -298,6 +385,24 @@ def test_create_schema_model_valid_data( assert schema.additional_patterns is False +def test_create_schema_model_with_constraints( + schema_builder: SchemaBuilder, + valid_node_types: Tuple[NodeType, ...], + valid_constraints: Tuple[ConstraintType, ...], +) -> None: + schema = schema_builder.create_schema_model( + list(valid_node_types), + constraints=list(valid_constraints), + ) + + assert schema.node_types == valid_node_types + assert schema.constraints == valid_constraints + assert len(schema.constraints) == 1 + assert schema.constraints[0].type == "UNIQUENESS" + assert schema.constraints[0].node_type == "PERSON" + assert schema.constraints[0].property_name == "name" + + @pytest.mark.asyncio async def test_run_method( schema_builder: SchemaBuilder, @@ -326,6 +431,25 @@ async def test_run_method( assert schema.additional_patterns is False +@pytest.mark.asyncio +async def test_run_method_with_constraints( + schema_builder: SchemaBuilder, + valid_node_types: Tuple[NodeType, ...], + valid_constraints: Tuple[ConstraintType, ...], +) -> None: + schema = await schema_builder.run( + list(valid_node_types), + constraints=list(valid_constraints), + ) + + assert schema.node_types == valid_node_types + assert schema.constraints == valid_constraints + assert len(schema.constraints) == 1 + assert schema.constraints[0].type == "UNIQUENESS" + assert schema.constraints[0].node_type == "PERSON" + assert schema.constraints[0].property_name == "name" + + def test_create_schema_model_invalid_entity( schema_builder: SchemaBuilder, valid_node_types: Tuple[NodeType, ...], @@ -452,6 +576,116 @@ def valid_schema_json() -> str: """ +@pytest.fixture +def schema_json_with_valid_constraints() -> str: + return """ + { + "node_types": [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING"}, + {"name": "email", "type": "STRING"} + ] + }, + { + "label": "Organization", + "properties": [ + {"name": "name", "type": "STRING"} + ] + } + ], + "relationship_types": [ + { + "label": "WORKS_FOR", + "properties": [ + {"name": "since", "type": "DATE"} + ] + } + ], + "patterns": [ + ["Person", "WORKS_FOR", "Organization"] + ], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"} + ] + } + """ + + +@pytest.fixture +def schema_json_with_invalid_constraints() -> str: + return """ + { + "node_types": [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING"} + ] + }, + { + "label": "Organization", + "properties": [ + {"name": "name", "type": "STRING"} + ] + } + ], + "relationship_types": [ + { + "label": "WORKS_FOR", + "properties": [ + {"name": "since", "type": "DATE"} + ] + } + ], + "patterns": [ + ["Person", "WORKS_FOR", "Organization"] + ], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"}, + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "email"}, + {"type": "UNIQUENESS", "node_type": "NonExistentNode", "property_name": "id"}, + {"type": "UNIQUENESS", "node_type": "Person", "property_name": ""} + ] + } + """ + + +@pytest.fixture +def schema_json_with_null_constraints() -> str: + return """ + { + "node_types": [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING"} + ] + }, + { + "label": "Organization", + "properties": [ + {"name": "name", "type": "STRING"} + ] + } + ], + "relationship_types": [ + { + "label": "WORKS_FOR", + "properties": [ + {"name": "since", "type": "DATE"} + ] + } + ], + "patterns": [ + ["Person", "WORKS_FOR", "Organization"] + ], + "constraints": null + } + """ + + @pytest.fixture def invalid_schema_json() -> str: return """ @@ -960,6 +1194,83 @@ async def test_schema_from_text_filters_relationships_without_labels( assert ("Person", "MANAGES", "Organization") in schema.patterns +@pytest.mark.asyncio +async def test_schema_from_text_with_valid_constraints( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + schema_json_with_valid_constraints: str, +) -> None: + # configure the mock LLM to return schema with valid constraints + mock_llm.ainvoke.return_value = LLMResponse( + content=schema_json_with_valid_constraints + ) + + # run the schema extraction + schema = await schema_from_text.run(text="Sample text for extraction") + + assert len(schema.constraints) == 1 + assert schema.constraints[0].type == "UNIQUENESS" + assert schema.constraints[0].node_type == "Person" + assert schema.constraints[0].property_name == "name" + + +@pytest.mark.asyncio +async def test_schema_from_text_filters_invalid_constraints( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + schema_json_with_invalid_constraints: str, +) -> None: + # configure the mock LLM to return schema with invalid constraints + mock_llm.ainvoke.return_value = LLMResponse( + content=schema_json_with_invalid_constraints + ) + + # run the schema extraction + schema = await schema_from_text.run(text="Sample text for extraction") + + # verify that invalid constraints were filtered out: + # constraints with NonExistentNode should be removed + # constraint with empty property_name should be removed + # only the valid constraint should remain + assert len(schema.constraints) == 1 + assert schema.constraints[0].node_type == "Person" + assert schema.constraints[0].property_name == "name" + + +@pytest.mark.asyncio +async def test_schema_from_text_handles_null_constraints( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + schema_json_with_null_constraints: str, +) -> None: + # configure the mock LLM to return schema with null constraints + mock_llm.ainvoke.return_value = LLMResponse( + content=schema_json_with_null_constraints + ) + + # run the schema extraction - should not crash + schema = await schema_from_text.run(text="Sample text for extraction") + + # verify schema was created with empty constraints + assert len(schema.constraints) == 0 + + +@pytest.mark.asyncio +async def test_schema_from_text_handles_missing_constraints( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + valid_schema_json: str, +) -> None: + # configure the mock LLM to return schema without constraints field + mock_llm.ainvoke.return_value = LLMResponse(content=valid_schema_json) + + # run the schema extraction - should not crash + schema = await schema_from_text.run(text="Sample text for extraction") + + # verify schema was created with empty constraints + assert len(schema.constraints) == 0 + + def test_clean_json_content_markdown_with_json_language( schema_from_text: SchemaFromTextExtractor, ) -> None: From cd6aecbf8f08d1608e341e9cfad97a343e6b20c0 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Fri, 12 Dec 2025 16:54:00 +0100 Subject: [PATCH 3/7] feat: add tests for ConstraintType --- .../experimental/components/test_schema.py | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 1d7e2cc58..7e426ca93 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -225,6 +225,48 @@ def test_schema_additional_parameter_validation() -> None: GraphSchema.model_validate(schema_dict) +def test_schema_constraint_validation_property_not_in_node_type() -> None: + schema_dict: dict[str, Any] = { + "node_types": [ + { + "label": "Person", + "properties": [{"name": "name", "type": "STRING"}], + } + ], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "email"} + ] + } + + with pytest.raises(SchemaValidationError) as exc_info: + GraphSchema.model_validate(schema_dict) + + assert "Constraint references undefined property" in str(exc_info.value) + assert "on node type 'Person'" in str(exc_info.value) + + +def test_schema_constraint_with_additional_properties_allows_unknown_property() -> None: + # if additional_properties is True, we can define constraints that are not in the node_type + schema_dict: dict[str, Any] = { + "node_types": [ + { + "label": "Person", + "properties": [{"name": "name", "type": "STRING"}], + "additional_properties": True, + } + ], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "email"} + ], + } + + # Should NOT raise - email is allowed because additional_properties=True + schema = GraphSchema.model_validate(schema_dict) + + assert len(schema.constraints) == 1 + assert schema.constraints[0].property_name == "email" + + def test_schema_with_valid_constraints() -> None: schema_dict: dict[str, Any] = { "node_types": [ @@ -1100,6 +1142,28 @@ def schema_json_with_relationships_without_labels() -> str: """ +@pytest.fixture +def schema_json_with_nonexistent_property_constraint() -> str: + return """ + { + "node_types": [ + { + "label": "Person", + "properties": [ + {"name": "name", "type": "STRING"} + ] + } + ], + "relationship_types": [], + "patterns": [], + "constraints": [ + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "name"}, + {"type": "UNIQUENESS", "node_type": "Person", "property_name": "nonexistent_property"} + ] + } + """ + + @pytest.mark.asyncio async def test_schema_from_text_filters_invalid_node_patterns( schema_from_text: SchemaFromTextExtractor, @@ -1237,6 +1301,26 @@ async def test_schema_from_text_filters_invalid_constraints( assert schema.constraints[0].property_name == "name" +@pytest.mark.asyncio +async def test_schema_from_text_filters_constraint_with_nonexistent_property( + schema_from_text: SchemaFromTextExtractor, + mock_llm: AsyncMock, + schema_json_with_nonexistent_property_constraint: str, +) -> None: + # configure the mock LLM to return schema with constraint on nonexistent property + mock_llm.ainvoke.return_value = LLMResponse( + content=schema_json_with_nonexistent_property_constraint + ) + + # run the schema extraction + schema = await schema_from_text.run(text="Sample text for extraction") + + # verify that only the valid constraint (with "name" property) remains + # the constraint with "nonexistent_property" should be filtered out + assert len(schema.constraints) == 1 + assert schema.constraints[0].property_name == "name" + + @pytest.mark.asyncio async def test_schema_from_text_handles_null_constraints( schema_from_text: SchemaFromTextExtractor, From 52e62896049095b227f5d3dc089a10e15fd880b1 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Fri, 12 Dec 2025 17:00:20 +0100 Subject: [PATCH 4/7] feat: add tests for ConstraintType --- tests/unit/experimental/components/test_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 7e426ca93..31477977f 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -235,7 +235,7 @@ def test_schema_constraint_validation_property_not_in_node_type() -> None: ], "constraints": [ {"type": "UNIQUENESS", "node_type": "Person", "property_name": "email"} - ] + ], } with pytest.raises(SchemaValidationError) as exc_info: From 090c760dcf27b9c4ed3c54743f919ff77c58a316 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Tue, 16 Dec 2025 14:57:35 +0100 Subject: [PATCH 5/7] constraints type check --- .../experimental/components/schema.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index d2804b676..0d20c8ee6 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -261,6 +261,10 @@ def validate_constraints_against_node_types(self) -> Self: if not self.constraints: return self for constraint in self.constraints: + # Only validate UNIQUENESS constraints (other types will be added) + if constraint.type != "UNIQUENESS": + continue + if not constraint.property_name: raise SchemaValidationError( f"Constraint has no property name: {constraint}. Property name is required." @@ -269,16 +273,15 @@ def validate_constraints_against_node_types(self) -> Self: raise SchemaValidationError( f"Constraint references undefined node type: {constraint.node_type}" ) - # Check if property_name exists on the node type (only if additional_properties is False) + # Check if property_name exists on the node type node_type = self._node_type_index[constraint.node_type] - if not node_type.additional_properties: - valid_property_names = {p.name for p in node_type.properties} - if constraint.property_name not in valid_property_names: - raise SchemaValidationError( - f"Constraint references undefined property '{constraint.property_name}' " - f"on node type '{constraint.node_type}'. " - f"Valid properties: {valid_property_names}" - ) + valid_property_names = {p.name for p in node_type.properties} + if constraint.property_name not in valid_property_names: + raise SchemaValidationError( + f"Constraint references undefined property '{constraint.property_name}' " + f"on node type '{constraint.node_type}'. " + f"Valid properties: {valid_property_names}" + ) return self def node_type_from_label(self, label: str) -> Optional[NodeType]: @@ -604,7 +607,7 @@ def _filter_relationships_without_labels( def _filter_invalid_constraints( self, constraints: List[Dict[str, Any]], node_types: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: - """Filter out constraints that reference undefined node types, have no property name, + """Filter out constraints that reference undefined node types, have no property name, are not UNIQUENESS type or reference a property that doesn't exist on the node type.""" if not constraints: return [] @@ -629,6 +632,14 @@ def _filter_invalid_constraints( filtered_constraints = [] for constraint in constraints: + # Only process UNIQUENESS constraints (other types will be added) + if constraint.get("type") != "UNIQUENESS": + logging.info( + f"Filtering out constraint: {constraint}. " + f"Only UNIQUENESS constraints are supported." + ) + continue + # check if the property_name is provided if not constraint.get("property_name"): logging.info( From 51398b7c650cb9b4bde8530b14ed6d66a8a740f9 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Tue, 16 Dec 2025 15:17:34 +0100 Subject: [PATCH 6/7] constraints type check --- tests/unit/experimental/components/test_schema.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py index 31477977f..98bb3fe58 100644 --- a/tests/unit/experimental/components/test_schema.py +++ b/tests/unit/experimental/components/test_schema.py @@ -245,7 +245,9 @@ def test_schema_constraint_validation_property_not_in_node_type() -> None: assert "on node type 'Person'" in str(exc_info.value) -def test_schema_constraint_with_additional_properties_allows_unknown_property() -> None: +def test_schema_constraint_with_additional_properties_with_allows_unknown_property() -> ( + None +): # if additional_properties is True, we can define constraints that are not in the node_type schema_dict: dict[str, Any] = { "node_types": [ @@ -260,11 +262,11 @@ def test_schema_constraint_with_additional_properties_allows_unknown_property() ], } - # Should NOT raise - email is allowed because additional_properties=True - schema = GraphSchema.model_validate(schema_dict) + # Should raise - email is not allowed because the property is not defined in the node + with pytest.raises(SchemaValidationError) as exc_info: + GraphSchema.model_validate(schema_dict) - assert len(schema.constraints) == 1 - assert schema.constraints[0].property_name == "email" + assert "Constraint references undefined property 'email'" in str(exc_info.value) def test_schema_with_valid_constraints() -> None: From aca02f0cc450c074e5885894db598892f0ef50f8 Mon Sep 17 00:00:00 2001 From: AmirLayegh Date: Tue, 16 Dec 2025 20:34:49 +0100 Subject: [PATCH 7/7] constraints type check --- src/neo4j_graphrag/experimental/components/schema.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py index 0d20c8ee6..d7c45694d 100644 --- a/src/neo4j_graphrag/experimental/components/schema.py +++ b/src/neo4j_graphrag/experimental/components/schema.py @@ -76,7 +76,6 @@ class PropertyType(BaseModel): ] description: str = "" required: bool = False - # unique: bool = False model_config = ConfigDict( frozen=True, )