diff --git a/src/oaklib/implementations/simpleobo/simple_obo_implementation.py b/src/oaklib/implementations/simpleobo/simple_obo_implementation.py index 3e0d05d1f..bcee7b2be 100644 --- a/src/oaklib/implementations/simpleobo/simple_obo_implementation.py +++ b/src/oaklib/implementations/simpleobo/simple_obo_implementation.py @@ -907,14 +907,14 @@ def _tv_dict(stanza: Stanza) -> Dict[str, List[str]]: elif tag == TAG_SUBSET: if node_is_deleted: continue - subsets1 = stanza1.simple_values(TAG_SUBSET) - subsets2 = stanza2.simple_values(TAG_SUBSET) - for subset in subsets1: - if subset not in subsets2: - yield kgcl.RemoveNodeFromSubset(id=_id(), about_node=t1id, in_subset=subset) - for subset in subsets2: - if subset not in subsets1: - yield kgcl.AddNodeToSubset(id=_id(), about_node=t2id, in_subset=subset) + xrefs1 = stanza1.simple_values(TAG_SUBSET) + xrefs2 = stanza2.simple_values(TAG_SUBSET) + for xref in xrefs1: + if xref not in xrefs2: + yield kgcl.RemoveNodeFromSubset(id=_id(), about_node=t1id, in_subset=xref) + for xref in xrefs2: + if xref not in xrefs1: + yield kgcl.AddNodeToSubset(id=_id(), about_node=t2id, in_subset=xref) elif tag == TAG_IS_A: isas1 = stanza1.simple_values(TAG_IS_A) isas2 = stanza2.simple_values(TAG_IS_A) @@ -950,6 +950,21 @@ def _tv_dict(stanza: Stanza) -> Dict[str, List[str]]: yield kgcl.NewSynonym( id=_id(), about_node=t2id, new_value=syn[0], predicate=pred ) + elif tag == TAG_XREF: + if node_is_deleted: + continue + xrefs1 = stanza1.simple_values(TAG_XREF) + xrefs2 = stanza2.simple_values(TAG_XREF) + for xref in xrefs1: + if xref not in xrefs2: + yield kgcl.RemoveMapping( + id=_id(), about_node=t1id, object=xref, predicate=HAS_DBXREF + ) + for xref in xrefs2: + if xref not in xrefs1: + yield kgcl.MappingCreation( + id=_id(), subject=t2id, object=xref, predicate=HAS_DBXREF + ) def different_from(self, entity: CURIE, other_ontology: DifferInterface) -> bool: t1 = self._stanza(entity, strict=False) diff --git a/src/oaklib/interfaces/differ_interface.py b/src/oaklib/interfaces/differ_interface.py index 36e939922..31665f02b 100644 --- a/src/oaklib/interfaces/differ_interface.py +++ b/src/oaklib/interfaces/differ_interface.py @@ -11,9 +11,10 @@ Change, ClassCreation, Edge, - EdgeChange, - EdgeCreation, EdgeDeletion, + MappingCreation, + # MappingReplacement, + MappingPredicateChange, NewSynonym, NewTextDefinition, NodeCreation, @@ -26,6 +27,7 @@ NodeTextDefinitionChange, NodeUnobsoletion, PredicateChange, + RemoveMapping, RemoveNodeFromSubset, RemoveSynonym, RemoveTextDefinition, @@ -275,34 +277,35 @@ def diff( if mappings_added_set: for mapping in mappings_added_set: predicate, xref = mapping - edge_created = EdgeCreation( + mapping_created = MappingCreation( id=_gen_id(), subject=entity, predicate=predicate, object=xref, ) - yield edge_created + yield mapping_created if mappings_removed_set: for mapping in mappings_removed_set: predicate, xref = mapping - deleted_edge = EdgeDeletion( + deleted_mapping = RemoveMapping( id=_gen_id(), - subject=entity, + about_node=entity, predicate=predicate, object=xref, ) - yield deleted_edge + yield deleted_mapping if mapping_changed_set: for changes in mapping_changed_set: object, new_predicate, old_predicate = changes - edge_change = EdgeChange( + mapping_change = MappingPredicateChange( id=_gen_id(), - about_edge=Edge(subject=entity, predicate=old_predicate, object=object), + about_node=entity, + object=object, old_value=old_predicate, new_value=new_predicate, ) - yield edge_change + yield mapping_change # ! Subset changes self_subsets = set(self.terms_subsets([entity])) diff --git a/src/oaklib/utilities/writers/change_handler.py b/src/oaklib/utilities/writers/change_handler.py index e959c2867..3d49ecc38 100644 --- a/src/oaklib/utilities/writers/change_handler.py +++ b/src/oaklib/utilities/writers/change_handler.py @@ -25,15 +25,17 @@ def write_markdown_table(self, title: str, header: str, rows: List[str]) -> None def handle_generic_change( self, value: List[object], title: str, header: str, row_format: str ) -> None: - rows = [row_format.format(change=change) for change in value] + rows = list({row_format.format(change=change) for change in value}) self.write_markdown_table(f"{title}: {len(rows)}", header, rows) def handle_new_synonym(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(obj.about_node)} | {obj.new_value} | {obj.predicate} |" - for obj in value - ] + rows = list( + { + f"| {self._format_entity_labels(obj.about_node)} | {obj.new_value} | {obj.predicate} |" + for obj in value + } + ) # Define the header for the table header = "| Term | New Synonym | Predicate |" @@ -43,11 +45,13 @@ def handle_new_synonym(self, value): def handle_edge_deletion(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.subject)} | {self._format_entity_labels(change.predicate)} |\ + rows = list( + { + f"| {self._format_entity_labels(change.subject)} | {self._format_entity_labels(change.predicate)} |\ {self._format_entity_labels(change.object)} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Subject| Predicate| Object|" @@ -57,11 +61,13 @@ def handle_edge_deletion(self, value): def handle_edge_creation(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.subject)} | {self._format_entity_labels(change.predicate)} |\ + rows = list( + { + f"| {self._format_entity_labels(change.subject)} | {self._format_entity_labels(change.predicate)} |\ {self._format_entity_labels(change.object)} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Subject | Predicate | Object|" @@ -71,12 +77,14 @@ def handle_edge_creation(self, value): def handle_edge_change(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_edge.subject)} | \ + rows = list( + { + f"| {self._format_entity_labels(change.about_edge.subject)} | \ {self._format_entity_labels(change.about_edge.predicate)} | \ {self._format_entity_labels(change.old_value)} | {self._format_entity_labels(change.new_value)} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Subject | Predicate | Old Object | New Object |" @@ -86,12 +94,14 @@ def handle_edge_change(self, value): def handle_node_move(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_edge.subject)} | \ + rows = list( + { + f"| {self._format_entity_labels(change.about_edge.subject)} | \ {self._format_entity_labels(change.about_edge.predicate)} |\ {self._format_entity_labels(change.about_edge.object)} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Subject | Predicate | Object |" @@ -101,13 +111,15 @@ def handle_node_move(self, value): def handle_predicate_change(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_edge.subject)} | \ + rows = list( + { + f"| {self._format_entity_labels(change.about_edge.subject)} | \ {self._format_entity_labels(change.old_value)} |\ {self._format_entity_labels(change.new_value)} | \ {self._format_entity_labels(change.about_edge.object)} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Subject | Old Predicate | New Predicate | Object |" @@ -117,9 +129,12 @@ def handle_predicate_change(self, value): def handle_node_rename(self, value): # Create rows for the table - rows = [ - f"| {change.about_node} | {change.old_value} | {change.new_value} |" for change in value - ] + rows = list( + { + f"| {change.about_node} | {change.old_value} | {change.new_value} |" + for change in value + } + ) # Define the header for the table header = "| ID | Old Label | New Label |" @@ -129,10 +144,12 @@ def handle_node_rename(self, value): def handle_remove_synonym(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |" - for change in value - ] + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |" + for change in value + } + ) # Define the header for the table header = "| Term | Removed Synonym |" @@ -142,11 +159,13 @@ def handle_remove_synonym(self, value): def hand_synonym_predicate_change(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |\ + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |\ {change.new_value} | {change.target} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Term | Old Predicate | New Predicate | Synonym |" @@ -156,11 +175,13 @@ def hand_synonym_predicate_change(self, value): def handle_node_text_definition_change(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |\ + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |\ {change.new_value} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Term | Old Text Definition | New Text Definition |" @@ -170,11 +191,13 @@ def handle_node_text_definition_change(self, value): def handle_node_text_definition(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |\ + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |\ {change.new_value} |" - for change in value - ] + for change in value + } + ) # Define the header for the table header = "| Term | Old Text Definition | New Text Definition |" @@ -184,7 +207,7 @@ def handle_node_text_definition(self, value): def handle_node_unobsoletion(self, value): # Create rows for the table - rows = [f"| {self._format_entity_labels(change.about_node)} |" for change in value] + rows = list({f"| {self._format_entity_labels(change.about_node)} |" for change in value}) # Define the header for the table header = "| Term |" @@ -194,7 +217,7 @@ def handle_node_unobsoletion(self, value): def handle_node_creation(self, value): # Create rows for the table - rows = [f"| {self._format_entity_labels(change.about_node)} |" for change in value] + rows = list({f"| {self._format_entity_labels(change.about_node)} |" for change in value}) # Define the header for the table header = "| Term |" @@ -204,7 +227,7 @@ def handle_node_creation(self, value): def handle_class_creation(self, value): # Create rows for the table - rows = [f"| {self._format_entity_labels(change.about_node)} |" for change in value] + rows = list({f"| {self._format_entity_labels(change.about_node)} |" for change in value}) # Define the header for the table header = "| Term |" @@ -214,7 +237,7 @@ def handle_class_creation(self, value): def handle_node_deletion(self, value): # Create rows for the table - rows = [f"| {self._format_entity_labels(change.about_node)} |" for change in value] + rows = list({f"| {self._format_entity_labels(change.about_node)} |" for change in value}) # Define the header for the table header = "| Term |" @@ -224,28 +247,34 @@ def handle_node_deletion(self, value): def handle_new_text_definition(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_node)} | {change.new_value} |" - for change in value - ] + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | {change.new_value} |" + for change in value + } + ) header = "| Term | New Text Definition |" self.write_markdown_table(f"Text definitions added: {len(rows)}", header, rows) def handle_remove_text_definition(self, value): # Create rows for the table - rows = [ - f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |" - for change in value - ] + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | {change.old_value} |" + for change in value + } + ) header = "| Term | Removed Text Definition |" self.write_markdown_table(f"Text definitions removed: {len(rows)}", header, rows) def handle_node_obsoletion_with_direct_replacement(self, value): - rows = [ - f"| {self._format_entity_labels(change.about_node)} |\ + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} |\ {self._format_entity_labels(change.has_direct_replacement)} |" - for change in value - ] + for change in value + } + ) header = "| Term | Replacement |" self.write_markdown_table(f"Nodes obsoleted with replacement: {len(rows)}", header, rows) @@ -255,22 +284,66 @@ def handle_node_obsoletion(self, value): self.write_markdown_table(f"Nodes obsoleted without replacement: {len(rows)}", header, rows) def handle_node_direct_merge(self, value): - rows = [ - f"| {self._format_entity_labels(change.about_node)} |\ + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} |\ {self._format_entity_labels(change.has_direct_replacement)} |" - for change in value - ] + for change in value + } + ) header = "| Term | Replacement |" self.write_markdown_table(f"Nodes merged: {len(rows)}", header, rows) def handle_add_node_to_subset(self, value): - rows = [ - f"| {self._format_entity_labels(change.about_node)} | {change.in_subset} |" - for change in value - ] + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | {change.in_subset} |" + for change in value + } + ) header = "| Term | Subset |" self.write_markdown_table(f"Nodes added to subset: {len(rows)}", header, rows) + def handle_mapping_creation(self, value): + rows = list( + { + f"""| {self._format_entity_labels(change.subject)} + | {change.predicate} | {self._format_entity_labels(change.object)} |""" + for change in value + } + ) + header = "| Subject | Predicate | Object |" + self.write_markdown_table(f"Mappings added: {len(rows)}", header, rows) + + def handle_mapping_predicate_change(self, value): + # Create rows for the table + rows = list( + { + f"| {self._format_entity_labels(change.about_node)} | \ + {self._format_entity_labels(change.old_value)} |\ + {self._format_entity_labels(change.new_value)} | \ + {self._format_entity_labels(change.object)} |" + for change in value + } + ) + + # Define the header for the table + header = "| Subject | Old Mapping Predicate | New Mapping Predicate | Object |" + + # Write the "Predicate Changed" section as a collapsible markdown table + self.write_markdown_table(f"Mappings changed: {len(rows)}", header, rows) + + def handle_remove_mapping(self, value): + rows = list( + { + f"""| {self._format_entity_labels(change.about_node)} + | {change.predicate} | {self._format_entity_labels(change.object)} |""" + for change in value + } + ) + header = "| Subject | Predicate | Object |" + self.write_markdown_table(f"Mappings removed: {len(rows)}", header, rows) + # def handle_datatype_or_language_tag_change(self, value): # # Implement datatype or language tag change handling logic here # logging.info("Datatype or language tag change handling not yet implemented.") @@ -340,6 +413,9 @@ def process_changes(self, curie_or_change: Dict[str, Change]): "EdgeCreation": self.handle_edge_creation, "EdgeChange": self.handle_edge_change, "AddNodeToSubset": self.handle_add_node_to_subset, + "MappingPredicateChange": self.handle_mapping_predicate_change, + "MappingCreation": self.handle_mapping_creation, + "RemoveMapping": self.handle_remove_mapping, # "DatatypeOrLanguageTagChange": self.handle_datatype_or_language_tag_change, # "LanguageTagChange": self.handle_language_tag_change, # "DatatypeChange": self.handle_datatype_change, diff --git a/tests/input/go-nucleus-modified.obo b/tests/input/go-nucleus-modified.obo index 7df9d994d..155604b12 100644 --- a/tests/input/go-nucleus-modified.obo +++ b/tests/input/go-nucleus-modified.obo @@ -377,7 +377,6 @@ subset: goslim_yeast synonym: "cell or subcellular entity" EXACT [] synonym: "cellular component" EXACT [] synonym: "subcellular entity" RELATED [NIF_Subcellular:nlx_subcell_100315] -xref: NIF_Subcellular:sao1337158144 is_a: BFO:0000040 ! material entity disjoint_from: GO:0008150 ! biological_process @@ -422,6 +421,7 @@ synonym: "cell nucleus" EXACT [] synonym: "horsetail nucleus" NARROW [GOC:al, GOC:mah, GOC:vw, PMID:15030757] xref: NIF_Subcellular:sao1702920020 xref: Wikipedia:Cell_nucleus +xref: foo:test is_a: GO:0043231 ! intracellular membrane-bounded organelle disjoint_from: GO:0005737 ! cytoplasm relationship: in_taxon NCBITaxon:2759 ! Eukaryota diff --git a/tests/test_cli.py b/tests/test_cli.py index fc55d3914..399424c10 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,7 +10,7 @@ import rdflib import yaml from click.testing import CliRunner -from kgcl_schema.datamodel.kgcl import NodeChange +from kgcl_schema.datamodel.kgcl import MappingCreation, NodeChange, RemoveMapping from linkml_runtime.loaders import json_loader, yaml_loader from oaklib import get_adapter from oaklib.cli import clear_cli_settings, main @@ -1266,6 +1266,16 @@ def test_diffs(self): self.assertTrue( any(c.about_node == "GO:0033673" for c in changes if isinstance(c, NodeChange)) ) + self.assertTrue( + any(c.subject == NUCLEUS for c in changes if isinstance(c, MappingCreation)) + ) + self.assertTrue( + any( + c.about_node == CELLULAR_COMPONENT + for c in changes + if isinstance(c, RemoveMapping) + ) + ) catalytic_activity_changed = any( c.about_node == CATALYTIC_ACTIVITY for c in changes if isinstance(c, NodeChange) ) diff --git a/tests/test_implementations/__init__.py b/tests/test_implementations/__init__.py index 3446c914b..96dadce99 100644 --- a/tests/test_implementations/__init__.py +++ b/tests/test_implementations/__init__.py @@ -1035,6 +1035,18 @@ def test_diff(self, oi: DifferInterface, oi_modified: DifferInterface): predicate="RO:0002212", object="GO:0016301", ), + kgcl.RemoveMapping( + id=FIXED_ID, + about_node=CELLULAR_COMPONENT, + predicate="oio:hasDbXref", + object="NIF_Subcellular:sao1337158144", + ), + kgcl.MappingCreation( + id=FIXED_ID, + subject=NUCLEUS, + predicate="oio:hasDbXref", + object="foo:test", + ), ] for ch in diff: if isinstance(ch, list): @@ -1070,6 +1082,18 @@ def test_diff(self, oi: DifferInterface, oi_modified: DifferInterface): new_value="catalytic activity", ), kgcl.ClassCreation(id=FIXED_ID, about_node="GO:0033673"), + kgcl.MappingCreation( + id=FIXED_ID, + subject=CELLULAR_COMPONENT, + predicate="oio:hasDbXref", + object="NIF_Subcellular:sao1337158144", + ), + kgcl.RemoveMapping( + id=FIXED_ID, + about_node=NUCLEUS, + predicate="oio:hasDbXref", + object="foo:test", + ), ] rdiff = list(oi_modified.diff(oi)) for ch in rdiff: