Source code for oaklib.datamodels.text_annotator

# Auto generated from text_annotator.yaml by pythongen.py version: 0.0.1
# Generation date: 2023-09-16T18:49:46
# Schema: text-annotator
#
# id: https://w3id.org/oak/text_annotator
# description: A datamodel for representing the results of textual named entity recognition annotation results. This draws upon both SSSOM and https://www.w3.org/TR/annotation-model/
# license: https://creativecommons.org/publicdomain/zero/1.0/

import dataclasses
import re
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, List, Optional, Union

from jsonasobj2 import JsonObj, as_dict
from linkml_runtime.linkml_model.meta import (
    EnumDefinition,
    PermissibleValue,
    PvFormulaOptions,
)
from linkml_runtime.linkml_model.types import (
    Boolean,
    Float,
    Integer,
    String,
    Uriorcurie,
)
from linkml_runtime.utils.curienamespace import CurieNamespace
from linkml_runtime.utils.dataclass_extensions_376 import (
    dataclasses_init_fn_with_kwargs,
)
from linkml_runtime.utils.enumerations import EnumDefinitionImpl
from linkml_runtime.utils.formatutils import camelcase, sfx, underscore
from linkml_runtime.utils.metamodelcore import (
    Bool,
    URIorCURIE,
    bnode,
    empty_dict,
    empty_list,
)
from linkml_runtime.utils.slot import Slot
from linkml_runtime.utils.yamlutils import (
    YAMLRoot,
    extended_float,
    extended_int,
    extended_str,
)
from rdflib import Namespace, URIRef

metamodel_version = "1.7.0"
version = None

# Overwrite dataclasses _init_fn to add **kwargs in __init__
dataclasses._init_fn = dataclasses_init_fn_with_kwargs

# Namespaces
ANN = CurieNamespace("ann", "https://w3id.org/linkml/text_annotator/")
BPA = CurieNamespace("bpa", "https://bioportal.bioontology.org/annotator/")
LINKML = CurieNamespace("linkml", "https://w3id.org/linkml/")
OA = CurieNamespace("oa", "http://www.w3.org/ns/oa#")
OWL = CurieNamespace("owl", "http://www.w3.org/2002/07/owl#")
PAV = CurieNamespace("pav", "http://purl.org/pav/")
PROV = CurieNamespace("prov", "http://www.w3.org/ns/prov#")
RDF = CurieNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
RDFS = CurieNamespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
SCHEMA = CurieNamespace("schema", "http://schema.org/")
SH = CurieNamespace("sh", "https://w3id.org/shacl/")
SKOS = CurieNamespace("skos", "http://www.w3.org/2004/02/skos/core#")
SSSOM = CurieNamespace("sssom", "http://w3id.org/sssom/")
XSD = CurieNamespace("xsd", "http://www.w3.org/2001/XMLSchema#")
DEFAULT_ = ANN


# Types
class Position(Integer):
    type_class_uri = XSD.integer
    type_class_curie = "xsd:integer"
    type_name = "Position"
    type_model_uri = ANN.Position


# Class references
class TextualElementId(URIorCURIE):
    pass


@dataclass
class TextAnnotationConfiguration(YAMLRoot):
    """
    configuration for search
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.TextAnnotationConfiguration
    class_class_curie: ClassVar[str] = "ann:TextAnnotationConfiguration"
    class_name: ClassVar[str] = "TextAnnotationConfiguration"
    class_model_uri: ClassVar[URIRef] = ANN.TextAnnotationConfiguration

    matches_whole_text: Optional[Union[bool, Bool]] = None
    sources: Optional[Union[str, List[str]]] = empty_list()
    limit: Optional[int] = None
    token_exclusion_list: Optional[Union[str, List[str]]] = empty_list()
    categories: Optional[Union[str, List[str]]] = empty_list()
    model: Optional[str] = None
    include_aliases: Optional[Union[bool, Bool]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.matches_whole_text is not None and not isinstance(self.matches_whole_text, Bool):
            self.matches_whole_text = Bool(self.matches_whole_text)

        if not isinstance(self.sources, list):
            self.sources = [self.sources] if self.sources is not None else []
        self.sources = [v if isinstance(v, str) else str(v) for v in self.sources]

        if self.limit is not None and not isinstance(self.limit, int):
            self.limit = int(self.limit)

        if not isinstance(self.token_exclusion_list, list):
            self.token_exclusion_list = (
                [self.token_exclusion_list] if self.token_exclusion_list is not None else []
            )
        self.token_exclusion_list = [
            v if isinstance(v, str) else str(v) for v in self.token_exclusion_list
        ]

        if not isinstance(self.categories, list):
            self.categories = [self.categories] if self.categories is not None else []
        self.categories = [v if isinstance(v, str) else str(v) for v in self.categories]

        if self.model is not None and not isinstance(self.model, str):
            self.model = str(self.model)

        if self.include_aliases is not None and not isinstance(self.include_aliases, Bool):
            self.include_aliases = Bool(self.include_aliases)

        super().__post_init__(**kwargs)


@dataclass
class TextAnnotationResultSet(YAMLRoot):
    """
    A collection of annotation results
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.TextAnnotationResultSet
    class_class_curie: ClassVar[str] = "ann:TextAnnotationResultSet"
    class_name: ClassVar[str] = "TextAnnotationResultSet"
    class_model_uri: ClassVar[URIRef] = ANN.TextAnnotationResultSet

    annotations: Optional[
        Union[Union[dict, "TextAnnotation"], List[Union[dict, "TextAnnotation"]]]
    ] = empty_list()

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if not isinstance(self.annotations, list):
            self.annotations = [self.annotations] if self.annotations is not None else []
        self.annotations = [
            v if isinstance(v, TextAnnotation) else TextAnnotation(**as_dict(v))
            for v in self.annotations
        ]

        super().__post_init__(**kwargs)


@dataclass
class TextualElement(YAMLRoot):
    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.TextualElement
    class_class_curie: ClassVar[str] = "ann:TextualElement"
    class_name: ClassVar[str] = "TextualElement"
    class_model_uri: ClassVar[URIRef] = ANN.TextualElement

    id: Union[str, TextualElementId] = None
    text: Optional[str] = None
    source_text: Optional[str] = None
    parent_document: Optional[Union[str, URIorCURIE]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self._is_empty(self.id):
            self.MissingRequiredField("id")
        if not isinstance(self.id, TextualElementId):
            self.id = TextualElementId(self.id)

        if self.text is not None and not isinstance(self.text, str):
            self.text = str(self.text)

        if self.source_text is not None and not isinstance(self.source_text, str):
            self.source_text = str(self.source_text)

        if self.parent_document is not None and not isinstance(self.parent_document, URIorCURIE):
            self.parent_document = URIorCURIE(self.parent_document)

        super().__post_init__(**kwargs)


@dataclass
class HasSpan(YAMLRoot):
    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.HasSpan
    class_class_curie: ClassVar[str] = "ann:HasSpan"
    class_name: ClassVar[str] = "HasSpan"
    class_model_uri: ClassVar[URIRef] = ANN.HasSpan

    subject_start: Optional[Union[int, Position]] = None
    subject_end: Optional[Union[int, Position]] = None
    subject_label: Optional[str] = None
    subject_source: Optional[str] = None
    subject_text_id: Optional[Union[str, TextualElementId]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.subject_start is not None and not isinstance(self.subject_start, Position):
            self.subject_start = Position(self.subject_start)

        if self.subject_end is not None and not isinstance(self.subject_end, Position):
            self.subject_end = Position(self.subject_end)

        if self.subject_label is not None and not isinstance(self.subject_label, str):
            self.subject_label = str(self.subject_label)

        if self.subject_source is not None and not isinstance(self.subject_source, str):
            self.subject_source = str(self.subject_source)

        if self.subject_text_id is not None and not isinstance(
            self.subject_text_id, TextualElementId
        ):
            self.subject_text_id = TextualElementId(self.subject_text_id)

        super().__post_init__(**kwargs)


[docs] @dataclass class TextAnnotation(YAMLRoot): """ An individual text annotation """ _inherited_slots: ClassVar[List[str]] = [] class_class_uri: ClassVar[URIRef] = OA.Annotation class_class_curie: ClassVar[str] = "oa:Annotation" class_name: ClassVar[str] = "TextAnnotation" class_model_uri: ClassVar[URIRef] = ANN.TextAnnotation predicate_id: Optional[str] = None object_id: Optional[str] = None object_label: Optional[str] = None object_categories: Optional[Union[str, List[str]]] = empty_list() object_source: Optional[str] = None confidence: Optional[float] = None match_string: Optional[str] = None is_longest_match: Optional[Union[bool, Bool]] = None matches_whole_text: Optional[Union[bool, Bool]] = None match_type: Optional[str] = None info: Optional[str] = None object_aliases: Optional[Union[str, List[str]]] = empty_list() subject_start: Optional[Union[int, Position]] = None subject_end: Optional[Union[int, Position]] = None subject_label: Optional[str] = None subject_source: Optional[str] = None subject_text_id: Optional[Union[str, TextualElementId]] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if self.predicate_id is not None and not isinstance(self.predicate_id, str): self.predicate_id = str(self.predicate_id) if self.object_id is not None and not isinstance(self.object_id, str): self.object_id = str(self.object_id) if self.object_label is not None and not isinstance(self.object_label, str): self.object_label = str(self.object_label) if not isinstance(self.object_categories, list): self.object_categories = ( [self.object_categories] if self.object_categories is not None else [] ) self.object_categories = [ v if isinstance(v, str) else str(v) for v in self.object_categories ] if self.object_source is not None and not isinstance(self.object_source, str): self.object_source = str(self.object_source) if self.confidence is not None and not isinstance(self.confidence, float): self.confidence = float(self.confidence) if self.match_string is not None and not isinstance(self.match_string, str): self.match_string = str(self.match_string) if self.is_longest_match is not None and not isinstance(self.is_longest_match, Bool): self.is_longest_match = Bool(self.is_longest_match) if self.matches_whole_text is not None and not isinstance(self.matches_whole_text, Bool): self.matches_whole_text = Bool(self.matches_whole_text) if self.match_type is not None and not isinstance(self.match_type, str): self.match_type = str(self.match_type) if self.info is not None and not isinstance(self.info, str): self.info = str(self.info) if not isinstance(self.object_aliases, list): self.object_aliases = [self.object_aliases] if self.object_aliases is not None else [] self.object_aliases = [v if isinstance(v, str) else str(v) for v in self.object_aliases] if self.subject_start is not None and not isinstance(self.subject_start, Position): self.subject_start = Position(self.subject_start) if self.subject_end is not None and not isinstance(self.subject_end, Position): self.subject_end = Position(self.subject_end) if self.subject_label is not None and not isinstance(self.subject_label, str): self.subject_label = str(self.subject_label) if self.subject_source is not None and not isinstance(self.subject_source, str): self.subject_source = str(self.subject_source) if self.subject_text_id is not None and not isinstance( self.subject_text_id, TextualElementId ): self.subject_text_id = TextualElementId(self.subject_text_id) super().__post_init__(**kwargs)
# Enumerations class TransformationType(EnumDefinitionImpl): """ A controlled datamodels of the types of transformation that can be applied to """ Stemming = PermissibleValue( text="Stemming", description="Removal of the last few characters of a word to yield a stem term for each word in the term", ) Lemmatization = PermissibleValue( text="Lemmatization", description="Contextual reduction of a word to its base form for each word in the term", ) WordOrderNormalization = PermissibleValue( text="WordOrderNormalization", description="reorder words in the term to a standard order such that comparisons are order-independent", ) Depluralization = PermissibleValue( text="Depluralization", description="Transform plural form to singular form for each word in a term", ) CaseNormalization = PermissibleValue( text="CaseNormalization", description="Transform term to a standard case, typically lowercase", ) WhitespaceNormalization = PermissibleValue( text="WhitespaceNormalization", description="Trim whitespace, condense whitespace runs, and transform all non-space whitespace to spaces", ) TermExpanson = PermissibleValue( text="TermExpanson", description="Expand terms using a dictionary" ) _defn = EnumDefinition( name="TransformationType", description="A controlled datamodels of the types of transformation that can be applied to", ) # Slots class slots: pass slots.textAnnotationConfiguration__matches_whole_text = Slot( uri=ANN.matches_whole_text, name="textAnnotationConfiguration__matches_whole_text", curie=ANN.curie("matches_whole_text"), model_uri=ANN.textAnnotationConfiguration__matches_whole_text, domain=None, range=Optional[Union[bool, Bool]], ) slots.textAnnotationConfiguration__sources = Slot( uri=ANN.sources, name="textAnnotationConfiguration__sources", curie=ANN.curie("sources"), model_uri=ANN.textAnnotationConfiguration__sources, domain=None, range=Optional[Union[str, List[str]]], ) slots.textAnnotationConfiguration__limit = Slot( uri=ANN.limit, name="textAnnotationConfiguration__limit", curie=ANN.curie("limit"), model_uri=ANN.textAnnotationConfiguration__limit, domain=None, range=Optional[int], ) slots.textAnnotationConfiguration__token_exclusion_list = Slot( uri=ANN.token_exclusion_list, name="textAnnotationConfiguration__token_exclusion_list", curie=ANN.curie("token_exclusion_list"), model_uri=ANN.textAnnotationConfiguration__token_exclusion_list, domain=None, range=Optional[Union[str, List[str]]], ) slots.textAnnotationConfiguration__categories = Slot( uri=ANN.categories, name="textAnnotationConfiguration__categories", curie=ANN.curie("categories"), model_uri=ANN.textAnnotationConfiguration__categories, domain=None, range=Optional[Union[str, List[str]]], ) slots.textAnnotationConfiguration__model = Slot( uri=ANN.model, name="textAnnotationConfiguration__model", curie=ANN.curie("model"), model_uri=ANN.textAnnotationConfiguration__model, domain=None, range=Optional[str], ) slots.textAnnotationConfiguration__include_aliases = Slot( uri=ANN.include_aliases, name="textAnnotationConfiguration__include_aliases", curie=ANN.curie("include_aliases"), model_uri=ANN.textAnnotationConfiguration__include_aliases, domain=None, range=Optional[Union[bool, Bool]], ) slots.textAnnotationResultSet__annotations = Slot( uri=ANN.annotations, name="textAnnotationResultSet__annotations", curie=ANN.curie("annotations"), model_uri=ANN.textAnnotationResultSet__annotations, domain=None, range=Optional[Union[Union[dict, TextAnnotation], List[Union[dict, TextAnnotation]]]], ) slots.textualElement__id = Slot( uri=ANN.id, name="textualElement__id", curie=ANN.curie("id"), model_uri=ANN.textualElement__id, domain=None, range=URIRef, ) slots.textualElement__text = Slot( uri=ANN.text, name="textualElement__text", curie=ANN.curie("text"), model_uri=ANN.textualElement__text, domain=None, range=Optional[str], ) slots.textualElement__source_text = Slot( uri=ANN.source_text, name="textualElement__source_text", curie=ANN.curie("source_text"), model_uri=ANN.textualElement__source_text, domain=None, range=Optional[str], ) slots.textualElement__parent_document = Slot( uri=ANN.parent_document, name="textualElement__parent_document", curie=ANN.curie("parent_document"), model_uri=ANN.textualElement__parent_document, domain=None, range=Optional[Union[str, URIorCURIE]], ) slots.hasSpan__subject_start = Slot( uri=ANN.subject_start, name="hasSpan__subject_start", curie=ANN.curie("subject_start"), model_uri=ANN.hasSpan__subject_start, domain=None, range=Optional[Union[int, Position]], ) slots.hasSpan__subject_end = Slot( uri=ANN.subject_end, name="hasSpan__subject_end", curie=ANN.curie("subject_end"), model_uri=ANN.hasSpan__subject_end, domain=None, range=Optional[Union[int, Position]], ) slots.hasSpan__subject_label = Slot( uri=ANN.subject_label, name="hasSpan__subject_label", curie=ANN.curie("subject_label"), model_uri=ANN.hasSpan__subject_label, domain=None, range=Optional[str], ) slots.hasSpan__subject_source = Slot( uri=SSSOM.subject_source, name="hasSpan__subject_source", curie=SSSOM.curie("subject_source"), model_uri=ANN.hasSpan__subject_source, domain=None, range=Optional[str], ) slots.hasSpan__subject_text_id = Slot( uri=ANN.subject_text_id, name="hasSpan__subject_text_id", curie=ANN.curie("subject_text_id"), model_uri=ANN.hasSpan__subject_text_id, domain=None, range=Optional[Union[str, TextualElementId]], ) slots.textAnnotation__predicate_id = Slot( uri=SSSOM.predicate_id, name="textAnnotation__predicate_id", curie=SSSOM.curie("predicate_id"), model_uri=ANN.textAnnotation__predicate_id, domain=None, range=Optional[str], ) slots.textAnnotation__object_id = Slot( uri=SSSOM.object_id, name="textAnnotation__object_id", curie=SSSOM.curie("object_id"), model_uri=ANN.textAnnotation__object_id, domain=None, range=Optional[str], ) slots.textAnnotation__object_label = Slot( uri=SSSOM.object_label, name="textAnnotation__object_label", curie=SSSOM.curie("object_label"), model_uri=ANN.textAnnotation__object_label, domain=None, range=Optional[str], ) slots.textAnnotation__object_categories = Slot( uri=ANN.object_categories, name="textAnnotation__object_categories", curie=ANN.curie("object_categories"), model_uri=ANN.textAnnotation__object_categories, domain=None, range=Optional[Union[str, List[str]]], ) slots.textAnnotation__object_source = Slot( uri=SSSOM.object_source, name="textAnnotation__object_source", curie=SSSOM.curie("object_source"), model_uri=ANN.textAnnotation__object_source, domain=None, range=Optional[str], ) slots.textAnnotation__confidence = Slot( uri=SSSOM.confidence, name="textAnnotation__confidence", curie=SSSOM.curie("confidence"), model_uri=ANN.textAnnotation__confidence, domain=None, range=Optional[float], ) slots.textAnnotation__match_string = Slot( uri=SSSOM.match_string, name="textAnnotation__match_string", curie=SSSOM.curie("match_string"), model_uri=ANN.textAnnotation__match_string, domain=None, range=Optional[str], ) slots.textAnnotation__is_longest_match = Slot( uri=ANN.is_longest_match, name="textAnnotation__is_longest_match", curie=ANN.curie("is_longest_match"), model_uri=ANN.textAnnotation__is_longest_match, domain=None, range=Optional[Union[bool, Bool]], ) slots.textAnnotation__matches_whole_text = Slot( uri=ANN.matches_whole_text, name="textAnnotation__matches_whole_text", curie=ANN.curie("matches_whole_text"), model_uri=ANN.textAnnotation__matches_whole_text, domain=None, range=Optional[Union[bool, Bool]], ) slots.textAnnotation__match_type = Slot( uri=ANN.match_type, name="textAnnotation__match_type", curie=ANN.curie("match_type"), model_uri=ANN.textAnnotation__match_type, domain=None, range=Optional[str], ) slots.textAnnotation__info = Slot( uri=ANN.info, name="textAnnotation__info", curie=ANN.curie("info"), model_uri=ANN.textAnnotation__info, domain=None, range=Optional[str], ) slots.textAnnotation__object_aliases = Slot( uri=ANN.object_aliases, name="textAnnotation__object_aliases", curie=ANN.curie("object_aliases"), model_uri=ANN.textAnnotation__object_aliases, domain=None, range=Optional[Union[str, List[str]]], )