Source code for oaklib.datamodels.text_annotator

# Auto generated from text_annotator.yaml by pythongen.py version: 0.0.1
# Generation date: 2023-09-16T18:49:46
# Schema: text-annotator
#
# id: https://w3id.org/oak/text_annotator
# description: A datamodel for representing the results of textual named entity recognition annotation results. This draws upon both SSSOM and https://www.w3.org/TR/annotation-model/
# license: https://creativecommons.org/publicdomain/zero/1.0/

import dataclasses
import re
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, List, Optional, Union

from jsonasobj2 import JsonObj, as_dict
from linkml_runtime.linkml_model.meta import (
    EnumDefinition,
    PermissibleValue,
    PvFormulaOptions,
)
from linkml_runtime.linkml_model.types import (
    Boolean,
    Float,
    Integer,
    String,
    Uriorcurie,
)
from linkml_runtime.utils.curienamespace import CurieNamespace
from linkml_runtime.utils.dataclass_extensions_376 import (
    dataclasses_init_fn_with_kwargs,
)
from linkml_runtime.utils.enumerations import EnumDefinitionImpl
from linkml_runtime.utils.formatutils import camelcase, sfx, underscore
from linkml_runtime.utils.metamodelcore import (
    Bool,
    URIorCURIE,
    bnode,
    empty_dict,
    empty_list,
)
from linkml_runtime.utils.slot import Slot
from linkml_runtime.utils.yamlutils import (
    YAMLRoot,
    extended_float,
    extended_int,
    extended_str,
)
from rdflib import Namespace, URIRef

metamodel_version = "1.7.0"
version = None

# Overwrite dataclasses _init_fn to add **kwargs in __init__
dataclasses._init_fn = dataclasses_init_fn_with_kwargs

# Namespaces
ANN = CurieNamespace("ann", "https://w3id.org/linkml/text_annotator/")
BPA = CurieNamespace("bpa", "https://bioportal.bioontology.org/annotator/")
LINKML = CurieNamespace("linkml", "https://w3id.org/linkml/")
OA = CurieNamespace("oa", "http://www.w3.org/ns/oa#")
OWL = CurieNamespace("owl", "http://www.w3.org/2002/07/owl#")
PAV = CurieNamespace("pav", "http://purl.org/pav/")
PROV = CurieNamespace("prov", "http://www.w3.org/ns/prov#")
RDF = CurieNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
RDFS = CurieNamespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
SCHEMA = CurieNamespace("schema", "http://schema.org/")
SH = CurieNamespace("sh", "https://w3id.org/shacl/")
SKOS = CurieNamespace("skos", "http://www.w3.org/2004/02/skos/core#")
SSSOM = CurieNamespace("sssom", "http://w3id.org/sssom/")
XSD = CurieNamespace("xsd", "http://www.w3.org/2001/XMLSchema#")
DEFAULT_ = ANN


# Types
class Position(Integer):
    type_class_uri = XSD.integer
    type_class_curie = "xsd:integer"
    type_name = "Position"
    type_model_uri = ANN.Position


# Class references
class TextualElementId(URIorCURIE):
    pass


@dataclass
class TextAnnotationConfiguration(YAMLRoot):
    """
    configuration for search
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.TextAnnotationConfiguration
    class_class_curie: ClassVar[str] = "ann:TextAnnotationConfiguration"
    class_name: ClassVar[str] = "TextAnnotationConfiguration"
    class_model_uri: ClassVar[URIRef] = ANN.TextAnnotationConfiguration

    matches_whole_text: Optional[Union[bool, Bool]] = None
    sources: Optional[Union[str, List[str]]] = empty_list()
    limit: Optional[int] = None
    token_exclusion_list: Optional[Union[str, List[str]]] = empty_list()
    categories: Optional[Union[str, List[str]]] = empty_list()
    model: Optional[str] = None
    include_aliases: Optional[Union[bool, Bool]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.matches_whole_text is not None and not isinstance(self.matches_whole_text, Bool):
            self.matches_whole_text = Bool(self.matches_whole_text)

        if not isinstance(self.sources, list):
            self.sources = [self.sources] if self.sources is not None else []
        self.sources = [v if isinstance(v, str) else str(v) for v in self.sources]

        if self.limit is not None and not isinstance(self.limit, int):
            self.limit = int(self.limit)

        if not isinstance(self.token_exclusion_list, list):
            self.token_exclusion_list = (
                [self.token_exclusion_list] if self.token_exclusion_list is not None else []
            )
        self.token_exclusion_list = [
            v if isinstance(v, str) else str(v) for v in self.token_exclusion_list
        ]

        if not isinstance(self.categories, list):
            self.categories = [self.categories] if self.categories is not None else []
        self.categories = [v if isinstance(v, str) else str(v) for v in self.categories]

        if self.model is not None and not isinstance(self.model, str):
            self.model = str(self.model)

        if self.include_aliases is not None and not isinstance(self.include_aliases, Bool):
            self.include_aliases = Bool(self.include_aliases)

        super().__post_init__(**kwargs)


@dataclass
class TextAnnotationResultSet(YAMLRoot):
    """
    A collection of annotation results
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.TextAnnotationResultSet
    class_class_curie: ClassVar[str] = "ann:TextAnnotationResultSet"
    class_name: ClassVar[str] = "TextAnnotationResultSet"
    class_model_uri: ClassVar[URIRef] = ANN.TextAnnotationResultSet

    annotations: Optional[
        Union[Union[dict, "TextAnnotation"], List[Union[dict, "TextAnnotation"]]]
    ] = empty_list()

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if not isinstance(self.annotations, list):
            self.annotations = [self.annotations] if self.annotations is not None else []
        self.annotations = [
            v if isinstance(v, TextAnnotation) else TextAnnotation(**as_dict(v))
            for v in self.annotations
        ]

        super().__post_init__(**kwargs)


@dataclass
class TextualElement(YAMLRoot):
    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.TextualElement
    class_class_curie: ClassVar[str] = "ann:TextualElement"
    class_name: ClassVar[str] = "TextualElement"
    class_model_uri: ClassVar[URIRef] = ANN.TextualElement

    id: Union[str, TextualElementId] = None
    text: Optional[str] = None
    source_text: Optional[str] = None
    parent_document: Optional[Union[str, URIorCURIE]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self._is_empty(self.id):
            self.MissingRequiredField("id")
        if not isinstance(self.id, TextualElementId):
            self.id = TextualElementId(self.id)

        if self.text is not None and not isinstance(self.text, str):
            self.text = str(self.text)

        if self.source_text is not None and not isinstance(self.source_text, str):
            self.source_text = str(self.source_text)

        if self.parent_document is not None and not isinstance(self.parent_document, URIorCURIE):
            self.parent_document = URIorCURIE(self.parent_document)

        super().__post_init__(**kwargs)


@dataclass
class HasSpan(YAMLRoot):
    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = ANN.HasSpan
    class_class_curie: ClassVar[str] = "ann:HasSpan"
    class_name: ClassVar[str] = "HasSpan"
    class_model_uri: ClassVar[URIRef] = ANN.HasSpan

    subject_start: Optional[Union[int, Position]] = None
    subject_end: Optional[Union[int, Position]] = None
    subject_label: Optional[str] = None
    subject_source: Optional[str] = None
    subject_text_id: Optional[Union[str, TextualElementId]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.subject_start is not None and not isinstance(self.subject_start, Position):
            self.subject_start = Position(self.subject_start)

        if self.subject_end is not None and not isinstance(self.subject_end, Position):
            self.subject_end = Position(self.subject_end)

        if self.subject_label is not None and not isinstance(self.subject_label, str):
            self.subject_label = str(self.subject_label)

        if self.subject_source is not None and not isinstance(self.subject_source, str):
            self.subject_source = str(self.subject_source)

        if self.subject_text_id is not None and not isinstance(
            self.subject_text_id, TextualElementId
        ):
            self.subject_text_id = TextualElementId(self.subject_text_id)

        super().__post_init__(**kwargs)



[docs]
@dataclass
class TextAnnotation(YAMLRoot):
    """
    An individual text annotation
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = OA.Annotation
    class_class_curie: ClassVar[str] = "oa:Annotation"
    class_name: ClassVar[str] = "TextAnnotation"
    class_model_uri: ClassVar[URIRef] = ANN.TextAnnotation

    predicate_id: Optional[str] = None
    object_id: Optional[str] = None
    object_label: Optional[str] = None
    object_categories: Optional[Union[str, List[str]]] = empty_list()
    object_source: Optional[str] = None
    confidence: Optional[float] = None
    match_string: Optional[str] = None
    is_longest_match: Optional[Union[bool, Bool]] = None
    matches_whole_text: Optional[Union[bool, Bool]] = None
    match_type: Optional[str] = None
    info: Optional[str] = None
    object_aliases: Optional[Union[str, List[str]]] = empty_list()
    subject_start: Optional[Union[int, Position]] = None
    subject_end: Optional[Union[int, Position]] = None
    subject_label: Optional[str] = None
    subject_source: Optional[str] = None
    subject_text_id: Optional[Union[str, TextualElementId]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.predicate_id is not None and not isinstance(self.predicate_id, str):
            self.predicate_id = str(self.predicate_id)

        if self.object_id is not None and not isinstance(self.object_id, str):
            self.object_id = str(self.object_id)

        if self.object_label is not None and not isinstance(self.object_label, str):
            self.object_label = str(self.object_label)

        if not isinstance(self.object_categories, list):
            self.object_categories = (
                [self.object_categories] if self.object_categories is not None else []
            )
        self.object_categories = [
            v if isinstance(v, str) else str(v) for v in self.object_categories
        ]

        if self.object_source is not None and not isinstance(self.object_source, str):
            self.object_source = str(self.object_source)

        if self.confidence is not None and not isinstance(self.confidence, float):
            self.confidence = float(self.confidence)

        if self.match_string is not None and not isinstance(self.match_string, str):
            self.match_string = str(self.match_string)

        if self.is_longest_match is not None and not isinstance(self.is_longest_match, Bool):
            self.is_longest_match = Bool(self.is_longest_match)

        if self.matches_whole_text is not None and not isinstance(self.matches_whole_text, Bool):
            self.matches_whole_text = Bool(self.matches_whole_text)

        if self.match_type is not None and not isinstance(self.match_type, str):
            self.match_type = str(self.match_type)

        if self.info is not None and not isinstance(self.info, str):
            self.info = str(self.info)

        if not isinstance(self.object_aliases, list):
            self.object_aliases = [self.object_aliases] if self.object_aliases is not None else []
        self.object_aliases = [v if isinstance(v, str) else str(v) for v in self.object_aliases]

        if self.subject_start is not None and not isinstance(self.subject_start, Position):
            self.subject_start = Position(self.subject_start)

        if self.subject_end is not None and not isinstance(self.subject_end, Position):
            self.subject_end = Position(self.subject_end)

        if self.subject_label is not None and not isinstance(self.subject_label, str):
            self.subject_label = str(self.subject_label)

        if self.subject_source is not None and not isinstance(self.subject_source, str):
            self.subject_source = str(self.subject_source)

        if self.subject_text_id is not None and not isinstance(
            self.subject_text_id, TextualElementId
        ):
            self.subject_text_id = TextualElementId(self.subject_text_id)

        super().__post_init__(**kwargs)



# Enumerations
class TransformationType(EnumDefinitionImpl):
    """
    A controlled datamodels of the types of transformation that can be applied to
    """

    Stemming = PermissibleValue(
        text="Stemming",
        description="Removal of the last few characters of a word to yield a stem term for each word in the term",
    )
    Lemmatization = PermissibleValue(
        text="Lemmatization",
        description="Contextual reduction of a word to its base form for each word in the term",
    )
    WordOrderNormalization = PermissibleValue(
        text="WordOrderNormalization",
        description="reorder words in the term to a standard order such that comparisons are order-independent",
    )
    Depluralization = PermissibleValue(
        text="Depluralization",
        description="Transform plural form to singular form for each word in a term",
    )
    CaseNormalization = PermissibleValue(
        text="CaseNormalization",
        description="Transform term to a standard case, typically lowercase",
    )
    WhitespaceNormalization = PermissibleValue(
        text="WhitespaceNormalization",
        description="Trim whitespace, condense whitespace runs, and transform all non-space whitespace to spaces",
    )
    TermExpanson = PermissibleValue(
        text="TermExpanson", description="Expand terms using a dictionary"
    )

    _defn = EnumDefinition(
        name="TransformationType",
        description="A controlled datamodels of the types of transformation that can be applied to",
    )


# Slots
class slots:
    pass


slots.textAnnotationConfiguration__matches_whole_text = Slot(
    uri=ANN.matches_whole_text,
    name="textAnnotationConfiguration__matches_whole_text",
    curie=ANN.curie("matches_whole_text"),
    model_uri=ANN.textAnnotationConfiguration__matches_whole_text,
    domain=None,
    range=Optional[Union[bool, Bool]],
)

slots.textAnnotationConfiguration__sources = Slot(
    uri=ANN.sources,
    name="textAnnotationConfiguration__sources",
    curie=ANN.curie("sources"),
    model_uri=ANN.textAnnotationConfiguration__sources,
    domain=None,
    range=Optional[Union[str, List[str]]],
)

slots.textAnnotationConfiguration__limit = Slot(
    uri=ANN.limit,
    name="textAnnotationConfiguration__limit",
    curie=ANN.curie("limit"),
    model_uri=ANN.textAnnotationConfiguration__limit,
    domain=None,
    range=Optional[int],
)

slots.textAnnotationConfiguration__token_exclusion_list = Slot(
    uri=ANN.token_exclusion_list,
    name="textAnnotationConfiguration__token_exclusion_list",
    curie=ANN.curie("token_exclusion_list"),
    model_uri=ANN.textAnnotationConfiguration__token_exclusion_list,
    domain=None,
    range=Optional[Union[str, List[str]]],
)

slots.textAnnotationConfiguration__categories = Slot(
    uri=ANN.categories,
    name="textAnnotationConfiguration__categories",
    curie=ANN.curie("categories"),
    model_uri=ANN.textAnnotationConfiguration__categories,
    domain=None,
    range=Optional[Union[str, List[str]]],
)

slots.textAnnotationConfiguration__model = Slot(
    uri=ANN.model,
    name="textAnnotationConfiguration__model",
    curie=ANN.curie("model"),
    model_uri=ANN.textAnnotationConfiguration__model,
    domain=None,
    range=Optional[str],
)

slots.textAnnotationConfiguration__include_aliases = Slot(
    uri=ANN.include_aliases,
    name="textAnnotationConfiguration__include_aliases",
    curie=ANN.curie("include_aliases"),
    model_uri=ANN.textAnnotationConfiguration__include_aliases,
    domain=None,
    range=Optional[Union[bool, Bool]],
)

slots.textAnnotationResultSet__annotations = Slot(
    uri=ANN.annotations,
    name="textAnnotationResultSet__annotations",
    curie=ANN.curie("annotations"),
    model_uri=ANN.textAnnotationResultSet__annotations,
    domain=None,
    range=Optional[Union[Union[dict, TextAnnotation], List[Union[dict, TextAnnotation]]]],
)

slots.textualElement__id = Slot(
    uri=ANN.id,
    name="textualElement__id",
    curie=ANN.curie("id"),
    model_uri=ANN.textualElement__id,
    domain=None,
    range=URIRef,
)

slots.textualElement__text = Slot(
    uri=ANN.text,
    name="textualElement__text",
    curie=ANN.curie("text"),
    model_uri=ANN.textualElement__text,
    domain=None,
    range=Optional[str],
)

slots.textualElement__source_text = Slot(
    uri=ANN.source_text,
    name="textualElement__source_text",
    curie=ANN.curie("source_text"),
    model_uri=ANN.textualElement__source_text,
    domain=None,
    range=Optional[str],
)

slots.textualElement__parent_document = Slot(
    uri=ANN.parent_document,
    name="textualElement__parent_document",
    curie=ANN.curie("parent_document"),
    model_uri=ANN.textualElement__parent_document,
    domain=None,
    range=Optional[Union[str, URIorCURIE]],
)

slots.hasSpan__subject_start = Slot(
    uri=ANN.subject_start,
    name="hasSpan__subject_start",
    curie=ANN.curie("subject_start"),
    model_uri=ANN.hasSpan__subject_start,
    domain=None,
    range=Optional[Union[int, Position]],
)

slots.hasSpan__subject_end = Slot(
    uri=ANN.subject_end,
    name="hasSpan__subject_end",
    curie=ANN.curie("subject_end"),
    model_uri=ANN.hasSpan__subject_end,
    domain=None,
    range=Optional[Union[int, Position]],
)

slots.hasSpan__subject_label = Slot(
    uri=ANN.subject_label,
    name="hasSpan__subject_label",
    curie=ANN.curie("subject_label"),
    model_uri=ANN.hasSpan__subject_label,
    domain=None,
    range=Optional[str],
)

slots.hasSpan__subject_source = Slot(
    uri=SSSOM.subject_source,
    name="hasSpan__subject_source",
    curie=SSSOM.curie("subject_source"),
    model_uri=ANN.hasSpan__subject_source,
    domain=None,
    range=Optional[str],
)

slots.hasSpan__subject_text_id = Slot(
    uri=ANN.subject_text_id,
    name="hasSpan__subject_text_id",
    curie=ANN.curie("subject_text_id"),
    model_uri=ANN.hasSpan__subject_text_id,
    domain=None,
    range=Optional[Union[str, TextualElementId]],
)

slots.textAnnotation__predicate_id = Slot(
    uri=SSSOM.predicate_id,
    name="textAnnotation__predicate_id",
    curie=SSSOM.curie("predicate_id"),
    model_uri=ANN.textAnnotation__predicate_id,
    domain=None,
    range=Optional[str],
)

slots.textAnnotation__object_id = Slot(
    uri=SSSOM.object_id,
    name="textAnnotation__object_id",
    curie=SSSOM.curie("object_id"),
    model_uri=ANN.textAnnotation__object_id,
    domain=None,
    range=Optional[str],
)

slots.textAnnotation__object_label = Slot(
    uri=SSSOM.object_label,
    name="textAnnotation__object_label",
    curie=SSSOM.curie("object_label"),
    model_uri=ANN.textAnnotation__object_label,
    domain=None,
    range=Optional[str],
)

slots.textAnnotation__object_categories = Slot(
    uri=ANN.object_categories,
    name="textAnnotation__object_categories",
    curie=ANN.curie("object_categories"),
    model_uri=ANN.textAnnotation__object_categories,
    domain=None,
    range=Optional[Union[str, List[str]]],
)

slots.textAnnotation__object_source = Slot(
    uri=SSSOM.object_source,
    name="textAnnotation__object_source",
    curie=SSSOM.curie("object_source"),
    model_uri=ANN.textAnnotation__object_source,
    domain=None,
    range=Optional[str],
)

slots.textAnnotation__confidence = Slot(
    uri=SSSOM.confidence,
    name="textAnnotation__confidence",
    curie=SSSOM.curie("confidence"),
    model_uri=ANN.textAnnotation__confidence,
    domain=None,
    range=Optional[float],
)

slots.textAnnotation__match_string = Slot(
    uri=SSSOM.match_string,
    name="textAnnotation__match_string",
    curie=SSSOM.curie("match_string"),
    model_uri=ANN.textAnnotation__match_string,
    domain=None,
    range=Optional[str],
)

slots.textAnnotation__is_longest_match = Slot(
    uri=ANN.is_longest_match,
    name="textAnnotation__is_longest_match",
    curie=ANN.curie("is_longest_match"),
    model_uri=ANN.textAnnotation__is_longest_match,
    domain=None,
    range=Optional[Union[bool, Bool]],
)

slots.textAnnotation__matches_whole_text = Slot(
    uri=ANN.matches_whole_text,
    name="textAnnotation__matches_whole_text",
    curie=ANN.curie("matches_whole_text"),
    model_uri=ANN.textAnnotation__matches_whole_text,
    domain=None,
    range=Optional[Union[bool, Bool]],
)

slots.textAnnotation__match_type = Slot(
    uri=ANN.match_type,
    name="textAnnotation__match_type",
    curie=ANN.curie("match_type"),
    model_uri=ANN.textAnnotation__match_type,
    domain=None,
    range=Optional[str],
)

slots.textAnnotation__info = Slot(
    uri=ANN.info,
    name="textAnnotation__info",
    curie=ANN.curie("info"),
    model_uri=ANN.textAnnotation__info,
    domain=None,
    range=Optional[str],
)

slots.textAnnotation__object_aliases = Slot(
    uri=ANN.object_aliases,
    name="textAnnotation__object_aliases",
    curie=ANN.curie("object_aliases"),
    model_uri=ANN.textAnnotation__object_aliases,
    domain=None,
    range=Optional[Union[str, List[str]]],
)