Source code for oaklib.interfaces.validator_interface

import logging
from abc import ABC
from typing import Any, Dict, Iterable, List, Optional, Union

from linkml_runtime.dumpers import json_dumper

from oaklib.datamodels.ontology_metadata import DefinitionConstraintComponent
from oaklib.datamodels.synonymizer_datamodel import RuleSet
from oaklib.datamodels.validation_datamodel import (
    MappingValidationResult,
    RepairConfiguration,
    RepairOperation,
    SeverityOptions,
    ValidationConfiguration,
    ValidationResult,
)
from oaklib.datamodels.vocabulary import HAS_DEFINITION_CURIE
from oaklib.interfaces import MappingProviderInterface, OboGraphInterface
from oaklib.interfaces.basic_ontology_interface import BasicOntologyInterface
from oaklib.types import CURIE
from oaklib.utilities.iterator_utils import chunk
from oaklib.utilities.lexical.synonymizer import apply_synonymizer
from oaklib.utilities.publication_utils.pubmed_wrapper import PubmedWrapper


[docs] class ValidatorInterface(BasicOntologyInterface, ABC): """ Multi-faceted ontology validator. This interface defines a number of different validation processes The overall goal is to support the following - validating against OntologyMetadata schema - lexical checks - wrapping reasoning - structural graph checks Specific implementations may choose to implement efficient methods for this. For example, a SQL implementation can quickly determine all terms missing definitions with a query over an indexed table. Currently the main implementation for this is the SqlDatabase implementation, this implements a generic property check using the OntologyMetadata datamodel See: - `OntologyMetadata <https://incatools.github.io/ontology-access-kit/datamodels/ontology-metadata/>`_ """
[docs] def term_curies_without_definitions(self) -> Iterable[CURIE]: """ Yields all entities that do not have a definition. :return: """ # Implementations are advise to implement more efficient interfaces for their back-end for curie in self.entities(): if self.definition(curie) is None: yield curie
[docs] def validate(self, configuration: ValidationConfiguration = None) -> Iterable[ValidationResult]: """ Validate entire ontology or wrapped ontologies. Validation results might be implementation specific - reasoners will yield logical problems - shape checkers or schema checkers like SHACL or LinkML will return closed-world structural violations - specialized implementations may yield lexical or other kinds of problems :return: """ raise NotImplementedError
[docs] def validate_mappings( self, entities: Iterable[CURIE] = None, adapters: Dict[str, BasicOntologyInterface] = None, configuration: ValidationConfiguration = None, ) -> Iterable[MappingValidationResult]: """ Validate mappings for a set of entities. Different adapters may implement different aspects of mapping validation. It includes: - checking cardinality of mappings (e.g. skos:exactMatch should be 1:1) - looking up mapped entities to check they are not obsolete - using AI to validate the content of mappings :param entities: entities to validate mappings for (None=all) :param adapters: adapter mapping to look up external entities :param configuration: validation configuration :return: """ from oaklib.utilities.mapping.mapping_validation import validate_mappings if not isinstance(self, MappingProviderInterface): raise ValueError(f"Cannot validate mappings on {self}") mappings = list(self.sssom_mappings(entities)) for errors, m in validate_mappings(mappings, adapters=adapters): for error in errors: result = MappingValidationResult( subject_id=m.subject_id, object_id=m.object_id, predicate_id=m.predicate_id, info=error, ) yield result
[docs] def validate_synonyms( self, entities: Iterable[CURIE] = None, adapters: Dict[str, BasicOntologyInterface] = None, configuration: ValidationConfiguration = None, synonymizer_rules: Optional[RuleSet] = None, ) -> Iterable[ValidationResult]: """ Validate synonyms for a set of entities. Different adapters may implement different aspects of synonym validation. It includes: - checking for duplicates - looking up mapped entities to check they are not obsolete - ensuring that a referenced synonym is still supported - using AI to validate the content of mappings :param entities: entities to validate mappings for (None=all) :param adapters: adapter mapping to look up external entities :param configuration: validation configuration :return: """ if not isinstance(self, OboGraphInterface): raise ValueError(f"Cannot validate synonyms on {self}") nodes = [self.node(n, include_metadata=True) for n in entities] for node in nodes: if node is None: continue syns = node.meta.synonyms for syn in syns: if syn.xrefs: for xref in syn.xrefs: from oaklib.utilities.mapping.mapping_validation import ( lookup_mapping_adapter, ) ext_adapter = lookup_mapping_adapter(xref, adapters) if ext_adapter is None: continue if not isinstance(ext_adapter, OboGraphInterface): raise ValueError(f"Cannot validate synonyms on {ext_adapter}") ext_node = ext_adapter.node(xref, include_metadata=True) ext_syns = ext_node.meta.synonyms # normalize to lower case for comparison # TODO: allow configurability of case rules synonym_forms = {syn.val} if synonymizer_rules: for _, syn_form, _ in apply_synonymizer( syn.val, synonymizer_rules.rules ): synonym_forms.add(syn_form.lower()) ext_syn_vals = [x.val.lower() for x in ext_syns] if not synonym_forms.intersection(ext_syn_vals): yield ValidationResult( subject=node.id, predicate=syn.pred, object=xref, object_str=syn.val, severity=SeverityOptions(SeverityOptions.ERROR), type="oio:SynonymNotFound", info=f"synonym not found in {xref}", )
[docs] def validate_definitions( self, entities: Iterable[CURIE] = None, adapters: Dict[str, BasicOntologyInterface] = None, configuration: ValidationConfiguration = None, skip_text_annotation=False, **kwargs, ) -> Iterable[ValidationResult]: """ Validate text definitions for a set of entities. Different adapters may implement different aspects of mapping validation. It includes: - testing definitions are present - validating the text definition against the SRS rubric - using AI to align the definition against any references :param entities: :param configuration: :param kwargs: :return: """ from oaklib.utilities.validation.definition_ontology_rule import DefinitionOntologyRule definition_rule = DefinitionOntologyRule(skip_text_annotation=skip_text_annotation) entities = list(entities) if entities else self.entities(filter_obsoletes=True) def _contract(url): return url.replace("https://w3id.org/oak/ontology-metadata/DCC.", "oaklib.om:DCC#") for r in definition_rule.evaluate(self, entities=entities): r.type = _contract(r.type) yield r if configuration and configuration.lookup_references: for entity_it in chunk(entities): for entity, defn, metadata in self.definitions( entity_it, include_metadata=True, ): if not metadata: continue for _k, vs in metadata.items(): refs = self.lookup_references(vs, adapters=adapters) for ref, obj in refs.items(): if obj is not None and not obj: yield ValidationResult( subject=entity, predicate=HAS_DEFINITION_CURIE, object=ref, object_str=defn, severity=SeverityOptions(SeverityOptions.ERROR), type=_contract( DefinitionConstraintComponent.ReferenceNotFound.meaning ), info=f"publication not found: {ref}", ) continue if isinstance(obj, dict) and obj.get("retracted", False): yield ValidationResult( subject=entity, predicate=HAS_DEFINITION_CURIE, object=ref, severity=SeverityOptions(SeverityOptions.ERROR), type=_contract( DefinitionConstraintComponent.ReferenceIsRetracted.meaning ), info=f"publication is retracted: {obj.get('title', obj)}", )
[docs] def repair( self, configuration: RepairConfiguration = None, dry_run=False ) -> Iterable[RepairOperation]: """ Finds problems and fixes them. :param configuration: :param dry_run: :return: """ raise NotImplementedError
def check_external_references(self): raise NotImplementedError
[docs] def is_coherent(self) -> bool: """ True if the ontology is logically coherent, as determined by deductive reasoning (e.g. an OWL reasoner) :return: true if coherent """ raise NotImplementedError
[docs] def unsatisfiable_classes(self, exclude_nothing=True) -> Iterable[CURIE]: """ Yields all classes that are unsatisfiable, as determined by deductive reasoning (e.g. an OWL reasoner) :param exclude_nothing: if True (default) do not include the tautological owl:Nothing :return: class curie iterator """ raise NotImplementedError
[docs] def lookup_references( self, references: List[CURIE], adapters: Dict[str, BasicOntologyInterface] = None, **kwargs ) -> Dict[CURIE, Union[bool, Dict[str, Any]]]: """ Lookup references in external ontologies. :param references: list of CURIEs to look up :param adapters: adapter mapping to look up external entities :param kwargs: additional arguments :return: mapping between CURIEs and pub objects """ if None in references: logging.warning(f"Null in references: {references}") references = [r for r in references if r] pubmed_wrapper = PubmedWrapper() pub_prefixes = ["PMID", "DOI", "PMC", "PMCID"] publication_ids = [ x for x in references if any(x.upper().startswith(y) for y in pub_prefixes) ] objs = pubmed_wrapper.objects_by_ids(publication_ids) # seed with Nones obj_map = {pmid: False for pmid in references if pmid.startswith("PMID")} for x in objs: obj_map[x["id"]] = x for x in references: # split out the prefix prefix, _ = x.split(":", 1) if prefix.upper() in pub_prefixes: continue if adapters is not None: if prefix in adapters: adapter = adapters[prefix] if not isinstance(adapter, OboGraphInterface): continue obj = adapter.node(x) obj_map[x] = json_dumper.to_dict(obj) return obj_map