Source code for oaklib.interfaces.mapping_provider_interface

import logging
from abc import ABC
from typing import Collection, Dict, Iterable, List, Optional, Union

from deprecation import deprecated
from sssom_schema import Mapping

from oaklib.datamodels.mapping_cluster_datamodel import MappingCluster
from oaklib.interfaces.basic_ontology_interface import BasicOntologyInterface
from oaklib.types import CURIE


[docs] class MappingProviderInterface(BasicOntologyInterface, ABC): """ An ontology interface that provides SSSOM mappings. For more on the SSSOM Data Model, see `<https://w3id.org/sssom/>`_ .. note :: most ontologies only include minimal metadata about mappings at this time, so many fields in the sssom mapping datamodel are not populated. The core method in this interface is :py:meth:`sssom_mappings`. .. note :: This interface is for serving pre-calculated mappings. See :ref:`sssom_utils` for on-the-fly mapping creation Data Model ----------- The central datamodel used here is `SSSOM <http://w3id.org/sssom>`_ Command Line Use ---------------- .. code:: runoak -i bioportal: term-mappings UBERON:0002101 -O sssom -o limb-mappings.sssom.tsv """
[docs] def sssom_mappings( self, curies: Optional[Union[CURIE, Iterable[CURIE]]] = None, source: Optional[str] = None ) -> Iterable[Mapping]: """ returns all sssom mappings matching filter conditions. To fetch all mappings, simply leave the filter conditions empty: >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> for mapping in adapter.sssom_mappings(): ... print(mapping.subject_id, mapping.object_id) <BLANKLINE> ... GO:0016772 EC:2.7.-.- ... To get annotations for a particular term or terms, and to constrain the source of the mapping, use the curies and source parameters: >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> for mapping in adapter.sssom_mappings(["GO:0005886"], source="Wikipedia"): ... print(mapping.subject_id, mapping.object_id) GO:0005886 Wikipedia:Cell_membrane Note you can also lookup from the perspective of the mapped entity: >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> for mapping in adapter.sssom_mappings(["Wikipedia:Cell_membrane"]): ... print(mapping.subject_id, mapping.object_id) GO:0005886 Wikipedia:Cell_membrane :param curies: [Optional] entity IDs (in ontology or mapped ontology) to filter by :param source: [Optional] only show mappings to source :return: """ logging.info("Getting all mappings") if curies is not None: if isinstance(curies, CURIE): it = [curies] else: it = curies else: it = self.entities() for curies in it: logging.debug(f"Getting mappings for {curies}") for m in self.get_sssom_mappings_by_curie(curies): if source: if m.object_source != source and m.subject_source != source: continue yield m
# TODO: move packages from mapping-walker def inject_mapping_labels(self, mappings: Iterable[Mapping]) -> None: for mapping in mappings: if not mapping.subject_label: mapping.subject_label = self.label(mapping.subject_id) if not mapping.object_label: mapping.object_label = self.label(mapping.object_id)
[docs] def sssom_mappings_by_source( self, subject_or_object_source: Optional[str] = None ) -> Iterable[Mapping]: """ All SSSOM mappings in the ontology The subject_id MUST be a CURIE in the ontology :param object_source: :return: """ logging.info("Getting all mappings") for curie in self.entities(): logging.debug(f"Getting mappings for {curie}") for m in self.get_sssom_mappings_by_curie(curie): if subject_or_object_source: if ( m.object_source != subject_or_object_source and m.subject_source != subject_or_object_source ): continue yield m
[docs] @deprecated("Replaced by sssom_mappings()") def all_sssom_mappings( self, subject_or_object_source: Optional[str] = None ) -> Iterable[Mapping]: return self.sssom_mappings_by_source(subject_or_object_source)
[docs] @deprecated("Use sssom_mappings()") def get_sssom_mappings_by_curie(self, *args, **kwargs) -> Iterable[Mapping]: """ All SSSOM mappings about a curie MUST yield mappings where EITHER subject OR object equals the CURIE :param kwargs: :return: """ return self.sssom_mappings(*args, **kwargs)
[docs] def get_transitive_mappings_by_curie(self, curie: CURIE) -> Iterable[Mapping]: """ :param curie: :return: """ raise NotImplementedError
def get_mapping_clusters(self) -> Iterable[MappingCluster]: raise NotImplementedError
[docs] def normalize( self, curie: CURIE, target_prefixes: List[str], source_prefixes: Optional[List[str]] = None, strict=False, ) -> Optional[CURIE]: """ Normalize a CURIE to a target prefix. >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> adapter.normalize("Wikipedia:Cell_membrane", ["GO"]) 'GO:0005886' :param curie: the CURIE to normalize :param target_prefixes: the prefixes to normalize to :param source_prefixes: the prefixes to normalize from :param strict: if True, raise an error if there is no single mapping to a target prefix :return: the normalized CURIE """ normalized_ids = [] if source_prefixes is not None: source_prefix = curie.split(":")[0] if source_prefix not in source_prefixes: source_prefixes_lc = {p.lower(): p for p in source_prefixes} if source_prefix.lower() not in source_prefixes_lc: return None # normalize case curie = curie.replace(source_prefix, source_prefixes_lc[source_prefix.lower()]) target_prefixes_lc_map = {p.lower(): p for p in target_prefixes} for m in self.sssom_mappings(curie): object_id = m.object_id if object_id.lower() == curie.lower(): object_id = m.subject_id object_prefix = object_id.split(":")[0] if object_prefix.lower() in target_prefixes_lc_map: if object_prefix not in target_prefixes: # case -mismatch; normalize case object_id = object_id.replace( object_prefix, target_prefixes_lc_map[object_prefix.lower()] ) if not strict: # strict is faster return object_id normalized_ids.append(object_id) if len(normalized_ids) == 1: return normalized_ids[0] if strict: raise ValueError(f"{curie} no single ID in {target_prefixes}; N={normalized_ids}") if normalized_ids: return normalized_ids[0] curie_lc = curie.lower() if curie_lc != curie: return self.normalize(curie_lc, target_prefixes, strict=strict) return curie
[docs] def normalize_prefix( self, curie: CURIE, prefixes: Optional[Collection[str]] = None, prefix_alias_map: Optional[Dict[str, str]] = None, ) -> CURIE: """ Normalize a CURIE to a target prefix. If a prefix alias map is supplied, this is takes precedence: >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> adapter.normalize_prefix("uniprot:P12345", prefix_alias_map={"uniprot": "UniProtKB"}) 'UniProtKB:P12345' If not prefix alias map is supplied, then the prefix is mapped to the preferred casing determined by the supplied prefixes: >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> adapter.normalize_prefix("go:0000001", prefixes=["GO"]) 'GO:0000001' :param curie: the CURIE to normalize :param prefixes: the prefixes to normalize to :param prefix_alias_map: a map of prefix aliases to prefixes :return: the normalized CURIE """ prefix = curie.split(":")[0] if prefix_alias_map: # prefix alias map takes priority if prefix in prefix_alias_map: return curie.replace(prefix, prefix_alias_map[prefix]) else: return curie if not prefixes: return curie # if not in prefix alias map, use an implicit alias map # that maps *any* casing to the preferred case if prefix in prefixes: return curie prefixes_lc = {p.lower(): p for p in prefixes} if prefix.lower() not in prefixes_lc: return curie # normalize case return curie.replace(prefix, prefixes_lc[prefix.lower()])
[docs] def create_normalization_map( self, curies: Optional[Iterable[CURIE]] = None, source_prefixes: Optional[Collection[str]] = None, target_prefixes: Optional[Collection[str]] = None, prefix_alias_map: Optional[Dict[str, str]] = None, ) -> Dict[CURIE, CURIE]: """ Create a normalization map for a set of CURIEs. This map can then be used to map IDs from one prefix space to another. For each curie in curies, find a mapping to a target prefix, and add it to the map. >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> nmap = adapter.create_normalization_map(source_prefixes=["GO"], target_prefixes=["Wikipedia"]) >>> nmap["GO:0005634"] 'Wikipedia:Cell_nucleus' You can also pass in an explicit prefix alias map: >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> nmap = adapter.create_normalization_map(source_prefixes=["GO"], target_prefixes=["WIKIPEDIA"], ... prefix_alias_map={"Wikipedia": "WIKIPEDIA"}) >>> nmap["GO:0005634"] 'WIKIPEDIA:Cell_nucleus' >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> nmap = adapter.create_normalization_map(source_prefixes=["go"], target_prefixes=["WIKIPEDIA"], ... prefix_alias_map={"Wikipedia": "WIKIPEDIA", "GO": "go"}) >>> nmap["go:0005634"] 'WIKIPEDIA:Cell_nucleus' >>> from oaklib import get_adapter >>> adapter = get_adapter("tests/input/go-nucleus.obo") >>> nmap = adapter.create_normalization_map(["go:0005634"], source_prefixes=["go"], ... target_prefixes=["WIKIPEDIA"], ... prefix_alias_map={"Wikipedia": "WIKIPEDIA", "GO": "go"}) >>> nmap["go:0005634"] 'WIKIPEDIA:Cell_nucleus' :param curies: :param subject_prefixes: :param object_prefixes: :param prefix_alias_map: maps from prefixes used in the adapter to desired prefixes :return: """ normalization_map = {} # create a reverse map, that maps from the desired prefixes to the prefixes used in the adapter if prefix_alias_map is not None: reverse_prefix_alias_map = {v: k for k, v in prefix_alias_map.items()} else: reverse_prefix_alias_map = None if curies is None: # TODO: use source_prefixes curies = list(self.entities()) else: # map input to the form used in this adapter curies = [ self.normalize_prefix(c, source_prefixes, reverse_prefix_alias_map) for c in curies ] subject_nmap = { c: self.normalize_prefix(c, source_prefixes, prefix_alias_map) for c in curies } # case-neutral form of target prefixes target_prefixes_lc = {p.lower(): p for p in target_prefixes} for mapping in self.sssom_mappings(curies): if mapping.subject_id in subject_nmap: other_id = mapping.object_id other_prefix = other_id.split(":")[0] if other_prefix in target_prefixes or other_prefix.lower() in target_prefixes_lc: normalization_map[mapping.subject_id] = other_id elif mapping.object_id in subject_nmap: other_id = mapping.subject_id other_prefix = other_id.split(":")[0] if other_prefix in target_prefixes or other_prefix.lower() in target_prefixes_lc: normalization_map[mapping.object_id] = other_id normalization_map = { self.normalize_prefix(k, target_prefixes, prefix_alias_map): self.normalize_prefix( v, target_prefixes, prefix_alias_map ) for k, v in normalization_map.items() } return normalization_map