Source code for dartfx.rdf.pydantic._base

"""Pydantic RDF Base Model - Bridge between Pydantic models and RDF graphs.

This module provides a base class and utilities for seamlessly converting Pydantic models
to and from RDF (Resource Description Framework) graphs using rdflib. It enables type-safe,
validated RDF data modeling with automatic serialization and deserialization.

The core components are:

- :class:`RdfBaseModel`: A Pydantic BaseModel subclass that provides RDF serialization
  and deserialization capabilities. Models inheriting from this class can be automatically
  converted to/from RDF graphs in various formats (Turtle, RDF/XML, JSON-LD, etc.).

- :class:`RdfProperty`: A metadata descriptor used in type annotations to map Pydantic
  fields to RDF predicates, with optional datatype and language specifications.

Basic Usage
-----------

Define a model by inheriting from RdfBaseModel and annotating fields with RdfProperty::

    from typing import Annotated, Optional, List
    from rdflib import Namespace, URIRef
    from dartfx.rdf.pydantic import RdfBaseModel, RdfProperty

    FOAF = Namespace("http://xmlns.com/foaf/0.1/")

    class Person(RdfBaseModel):
        rdf_type: str = str(FOAF.Person)
        rdf_namespace = FOAF
        rdf_prefixes = {"foaf": FOAF}

        name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
        email: Annotated[Optional[List[str]], RdfProperty(FOAF.mbox)] = None
        knows: Annotated[Optional[List[URIRef | Person]], RdfProperty(FOAF.knows)] = None

Serialize to RDF::

    person = Person(name=["Alice"], email=["alice@example.org"])
    turtle = person.to_rdf("turtle")
    # Output: Turtle format RDF with proper namespace bindings

Deserialize from RDF::

    restored = Person.from_rdf(turtle, format="turtle")
    assert restored.name == ["Alice"]

Key Features
------------

- **Type Safety**: Full Pydantic validation for RDF data
- **Multiple Formats**: Serialize to Turtle, RDF/XML, JSON-LD, N-Triples, etc.
- **Round-trip Support**: Lossless conversion between Python objects and RDF
- **Nested Objects**: Support for nested RdfBaseModel instances
- **List Values**: Automatic handling of multi-valued properties
- **Custom Datatypes**: Specify XSD datatypes and language tags
- **Namespace Management**: Automatic prefix binding for clean serialization
- **Flexible Identifiers**: Use custom ID fields or auto-generate UUIDs

Advanced Features
-----------------

Custom serializers and parsers::

    def serialize_date(d: date) -> str:
        return d.isoformat()

    def parse_date(node: Literal) -> date:
        return date.fromisoformat(str(node))

    birth_date: Annotated[Optional[date], RdfProperty(
        SCHEMA.birthDate,
        serializer=serialize_date,
        parser=parse_date
    )] = None

Language-tagged literals::

    description: Annotated[Optional[List[str]], RdfProperty(
        DC.description,
        language="en"
    )] = None

Custom datatypes::

    age: Annotated[Optional[int], RdfProperty(
        FOAF.age,
        datatype=XSD.integer
    )] = None

Examples
--------

Simple metadata example::

    from rdflib import Namespace, DCTERMS

    class Document(RdfBaseModel):
        rdf_namespace = DCTERMS
        rdf_prefixes = {"dcterms": DCTERMS}

        title: Annotated[Optional[List[str]], RdfProperty(DCTERMS.title)] = None
        creator: Annotated[Optional[List[str]], RdfProperty(DCTERMS.creator)] = None

    doc = Document(title=["My Document"], creator=["John Doe"])
    print(doc.to_rdf("turtle"))

Nested objects example::

    class Organization(RdfBaseModel):
        rdf_type: str = str(FOAF.Organization)
        name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None

    class Person(RdfBaseModel):
        rdf_type: str = str(FOAF.Person)
        name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
        works_for: Annotated[Optional[List[Organization]], RdfProperty(FOAF.workplaceHomepage)] = None

    org = Organization(name=["ACME Corp"])
    person = Person(name=["Alice"], works_for=[org])
    # Both person and organization are serialized to the graph

Notes
-----

- Field names don't need to match RDF predicate names - use RdfProperty to map them
- Use `Optional[List[T]]` for multi-valued properties (standard in RDF)
- The `id` field is special and maps to the RDF subject URI
- Custom `rdf_id_field` can be specified per model
- Auto-generated UUIDs are used when no ID is provided
- Namespace prefixes improve readability of serialized output

See Also
--------

- rdflib documentation: https://rdflib.readthedocs.io/
- Pydantic documentation: https://docs.pydantic.dev/
- RDF Primer: https://www.w3.org/TR/rdf11-primer/
"""

from __future__ import annotations

import re
import types
import uuid
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import date, datetime, time
from decimal import Decimal
from enum import Enum
from typing import (
    TYPE_CHECKING,
    Annotated,
    Any,
    ClassVar,
    Protocol,
    TypeVar,
    Union,
    cast,
    get_args,
    get_origin,
    runtime_checkable,
)

from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
from rdflib import RDF, XSD, BNode, Graph, Literal, Namespace, URIRef

T = TypeVar("T", bound="RdfBaseModel")


[docs] class LangString(BaseModel): """A string with an optional language tag. This class is used to represent RDF language-tagged literals in Pydantic models. It provides a structured way to handle localized strings while maintaining compatibility with Pydantic validation. Attributes ---------- value : str The string content of the literal. lang : str | None, optional The language tag (e.g., "en", "fr", "de"). Default is None. """ value: str lang: str | None = None def __str__(self) -> str: return self.value def __repr__(self) -> str: if self.lang: return f'"{self.value}"@{self.lang}' return f'"{self.value}"' def __eq__(self, other: Any) -> bool: if isinstance(other, LangString): return self.value == other.value and self.lang == other.lang return super().__eq__(other) def __hash__(self) -> int: return hash((self.value, self.lang))
# --------------------------------------------------------------------------- # Input types accepted by LocalizedStr coercion # --------------------------------------------------------------------------- LocalizedStrInput = ( str | LangString | list["str | LangString | dict[str, str | list[str]]"] | dict[str, str | list[str]] ) def _normalise_into( value: Any, acc: list[LangString], ) -> None: """Recursively normalise *value* and append ``LangString`` items to *acc*.""" if isinstance(value, LangString): acc.append(value) elif isinstance(value, str): acc.append(LangString(value=value, lang=None)) elif isinstance(value, dict): for lang_key, val in value.items(): lang = lang_key or None # "" → None if isinstance(val, list): for v in val: acc.append(LangString(value=str(v), lang=lang)) else: acc.append(LangString(value=str(val), lang=lang)) elif isinstance(value, list): for item in value: _normalise_into(item, acc) else: # Last-resort: stringify acc.append(LangString(value=str(value), lang=None)) def _deduplicate_lang_strings(items: list[LangString]) -> list[LangString]: """Remove duplicate ``(value, lang)`` pairs, preserving order.""" seen: set[tuple[str, str | None]] = set() result: list[LangString] = [] for ls in items: key = (ls.value, ls.lang) if key not in seen: seen.add(key) result.append(ls) return result
[docs] class LangStringList(list[LangString]): """A ``list[LangString]`` subclass with convenience query methods. Every mutation (``append``, ``extend``, ``+=``, ``insert``) automatically skips duplicate ``(value, lang)`` pairs and coerces flexible input types (``str``, ``dict``, ``LangString``) into ``LangString`` objects. Examples -------- :: from dartfx.rdf.pydantic import LangString, LangStringList ls = LangStringList([ LangString(value="World", lang="en"), LangString(value="Mundo", lang="es"), ]) ls += LangString(value="Welt", lang="de") ls.has_language("en") # True ls.count_by_lang("en") # 1 ls.languages() # {"en", "es", "de"} ls.has_synonyms("en") # False """ # -- internal helpers --------------------------------------------------- @staticmethod def _norm_lang(lang: str | None) -> str | None: """Normalise ``""`` to ``None`` for language tags.""" return None if lang == "" else lang def _keys(self) -> set[tuple[str, str | None]]: return {(ls.value, ls.lang) for ls in self} def _add_if_new(self, item: LangString) -> None: if (item.value, item.lang) not in self._keys(): super().append(item)
[docs] def untagged(self) -> LangStringList: """Return entries whose language tag is ``None``.""" return LangStringList(ls for ls in self if ls.lang is None)
# -- list overrides (uniqueness-preserving) -----------------------------
[docs] def append(self, item: LangString) -> None: """Append *item*, silently skipping if ``(value, lang)`` already exists.""" self._add_if_new(item)
[docs] def extend(self, items: Any) -> None: """Extend with *items*, coercing flexible inputs and deduplicating.""" normalised: list[LangString] = [] _normalise_into(items, normalised) for ls in normalised: self._add_if_new(ls)
[docs] def insert(self, index: int, item: LangString) -> None: # type: ignore[override] """Insert *item* at *index* if ``(value, lang)`` is not already present.""" if (item.value, item.lang) not in self._keys(): super().insert(index, item)
def __iadd__(self, other: Any) -> LangStringList: # type: ignore[override] """Support ``ls += LangString(...)`` and ``ls += [...]``.""" normalised: list[LangString] = [] if isinstance(other, LangString): normalised = [other] elif isinstance(other, list): _normalise_into(other, normalised) else: _normalise_into(other, normalised) for ls in normalised: self._add_if_new(ls) return self def __add__(self, other: Any) -> LangStringList: # type: ignore[override] """Return a new ``LangStringList`` with additional entries.""" result = LangStringList(self) result += other return result # -- subtraction (removal) ---------------------------------------------- def __isub__(self, other: Any) -> LangStringList: """Support ``ls -= LangString(...)`` and ``ls -= [...]``. Removes matching ``(value, lang)`` entries. """ to_remove: list[LangString] = [] if isinstance(other, LangString): to_remove = [other] else: _normalise_into(other, to_remove) keys_to_remove = {(ls.value, ls.lang) for ls in to_remove} # Filter in place kept = [ls for ls in self if (ls.value, ls.lang) not in keys_to_remove] self.clear() super().extend(kept) return self def __sub__(self, other: Any) -> LangStringList: """Return a new ``LangStringList`` with matching entries removed.""" result = LangStringList(self) result -= other return result # -- str-like behaviour ------------------------------------------------- def __str__(self) -> str: """Return the plain string value when unambiguous. * If there is exactly **one** entry → its value. * If there are multiple entries but exactly **one** untagged (``lang=None``) entry → that entry's value. * Otherwise → the default list representation. """ if len(self) == 1: return self[0].value untagged = self.untagged() if len(untagged) == 1: return untagged[0].value return super().__repr__() def __eq__(self, other: object) -> bool: """Allow comparison with ``str`` when str-like behaviour applies. * ``pref_label == "Hello"`` is ``True`` when there is exactly one entry with ``value="Hello"``, or when the single untagged entry has ``value="Hello"``. * List-to-list comparison works normally. """ if isinstance(other, str): if len(self) == 1: return self[0].value == other untagged = self.untagged() if len(untagged) == 1: return untagged[0].value == other return False return super().__eq__(other) def __hash__(self) -> int: # type: ignore[override] # Lists are unhashable by default; keep that behaviour. raise TypeError("unhashable type: 'LangStringList'") # -- query helpers ------------------------------------------------------
[docs] def count_by_lang(self, lang: str | None = None) -> int: """Return the number of entries for a given language tag. Parameters ---------- lang : str | None The language tag to count (e.g. ``"en"``). Use ``None`` or ``""`` for untagged (plain) strings. Returns ------- int Number of entries matching the language. """ lang = self._norm_lang(lang) return sum(1 for ls in self if ls.lang == lang)
[docs] def has_language(self, lang: str | None) -> bool: """Return ``True`` if at least one entry has the given language tag. Parameters ---------- lang : str | None Language tag to check. Use ``None`` or ``""`` for untagged entries. """ lang = self._norm_lang(lang) return any(ls.lang == lang for ls in self)
[docs] def has_untagged(self) -> bool: """Return ``True`` if at least one entry has no language tag (``lang=None``).""" return self.has_language(None)
[docs] def get_by_language(self, lang: str | None = None) -> LangStringList: """Return entries matching the given language tag. Parameters ---------- lang : str | None Language tag to filter by. Use ``None`` or ``""`` for untagged entries. Returns ------- LangStringList A new ``LangStringList`` containing only matching entries. """ lang = self._norm_lang(lang) return LangStringList(ls for ls in self if ls.lang == lang)
[docs] def has_synonyms(self, lang: str | None = None) -> bool: """Return ``True`` if the specified language has more than one entry. Parameters ---------- lang : str | None Language tag to check. If ``None`` or ``""``, checks untagged entries. """ return self.count_by_lang(lang) > 1
[docs] def languages(self) -> set[str | None]: """Return the set of distinct language tags (including ``None`` for untagged).""" return {ls.lang for ls in self}
def _coerce_to_lang_string_list( value: LocalizedStrInput | list[LangString] | LangStringList, ) -> LangStringList: """Coerce flexible input types into a ``LangStringList``. Accepted inputs: * ``str`` – becomes ``LangStringList([LangString(value=..., lang=None)])`` * ``LangString`` – wrapped in a LangStringList * ``dict[str, str | list[str]]`` – each key is a language tag (empty string ``""`` → ``lang=None``), each value becomes one or more ``LangString`` entries * ``list`` of any of the above (including nested dicts) – flattened * An existing ``LangStringList`` – passed through (deduplicated) Duplicate ``(value, lang)`` pairs are silently dropped, preserving insertion order. """ if isinstance(value, LangStringList): return LangStringList(_deduplicate_lang_strings(value)) if isinstance(value, list) and all(isinstance(v, LangString) for v in value): return LangStringList(_deduplicate_lang_strings(cast(list[LangString], value))) result: list[LangString] = [] _normalise_into(value, result) return LangStringList(_deduplicate_lang_strings(result)) if TYPE_CHECKING: # Mypy sees the wide input union so that ``Model(field="plain")`` type-checks. LocalizedStr = LocalizedStrInput | LangStringList else: # At runtime Pydantic uses the BeforeValidator to coerce inputs → LangStringList. LocalizedStr = Annotated[LangStringList, BeforeValidator(_coerce_to_lang_string_list)]
[docs] @dataclass(frozen=True) class RdfProperty: """Metadata descriptor for mapping Pydantic fields to RDF predicates. This class is used as metadata in type annotations to specify how a Pydantic field should be serialized to and deserialized from RDF. It provides control over the RDF predicate URI, datatype, language tags, and custom serialization. Parameters ---------- predicate : str | URIRef The RDF predicate URI for this property. Can be a string URI or an rdflib URIRef. Typically uses a namespace property like `FOAF.name`. datatype : str | URIRef | None, optional The XSD datatype URI for literal values. If None, the datatype is inferred from the Python type. Examples: XSD.string, XSD.integer, XSD.dateTime. Default is None. language : str | None, optional The language tag for string literals (e.g., "en", "fr", "de"). Creates language-tagged RDF literals. Cannot be used with datatype. Default is None. serializer : Callable | None, optional A custom function to transform Python values before RDF serialization. Signature: (value: Any) -> Any. The returned value should be compatible with RDF serialization (str, int, URIRef, Literal, etc.). Default is None. parser : Callable | None, optional A custom function to transform RDF nodes back to Python values during deserialization. Signature: (node: URIRef | Literal) -> Any. Default is None. Attributes ---------- predicate : str | URIRef The RDF predicate URI. datatype : str | URIRef | None The XSD datatype URI for literals. language : str | None The language tag for string literals. serializer : Callable | None Custom serialization function. parser : Callable | None Custom parsing function. Methods ------- predicate_uri() -> URIRef Convert the predicate to an rdflib URIRef. datatype_uri() -> URIRef | None Convert the datatype to an rdflib URIRef, or None if not specified. Examples -------- Basic property mapping:: from rdflib import FOAF from typing import Annotated, Optional, List name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None With datatype:: from rdflib import XSD age: Annotated[Optional[int], RdfProperty( FOAF.age, datatype=XSD.integer )] = None With language tag:: description: Annotated[Optional[List[str]], RdfProperty( DCTERMS.description, language="en" )] = None With custom serializer/parser:: from datetime import date def serialize_date(d: date) -> str: return d.isoformat() def parse_date(node) -> date: return date.fromisoformat(str(node)) birth_date: Annotated[Optional[date], RdfProperty( SCHEMA.birthDate, serializer=serialize_date, parser=parse_date )] = None Notes ----- - RdfProperty instances are immutable (frozen dataclass) - Use in Annotated type hints as metadata - Language and datatype are mutually exclusive - Custom serializers/parsers override default behavior - The predicate URI is the only required parameter See Also -------- RdfBaseModel : Base class for RDF-enabled Pydantic models """ predicate: str | URIRef datatype: str | URIRef | None = None language: str | None = None serializer: Any | None = None parser: Any | None = None
[docs] def predicate_uri(self) -> URIRef: """Convert the predicate to an rdflib URIRef. Returns ------- URIRef The predicate as an rdflib URIRef. Examples -------- >>> from rdflib import FOAF >>> prop = RdfProperty(FOAF.name) >>> prop.predicate_uri() rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name') """ result = _ensure_uri(self.predicate) assert result is not None # predicate is required, so this should never be None return result
[docs] def datatype_uri(self) -> URIRef | None: """Convert the datatype to an rdflib URIRef. Returns ------- URIRef | None The datatype as an rdflib URIRef, or None if no datatype is specified. Examples -------- >>> from rdflib import XSD >>> prop = RdfProperty(FOAF.age, datatype=XSD.integer) >>> prop.datatype_uri() rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer') """ return _ensure_uri(self.datatype)
[docs] @runtime_checkable class RdfUriGenerator(Protocol): """Protocol for objects that generate an RDF subject URI from a model instance. Any callable with the matching signature — including plain functions and lambdas — satisfies this protocol, so existing ``rdf_uri_generator`` callables require no changes. Parameters ---------- model : RdfBaseModel The model instance being serialised. base_uri : str | None, optional Base URI hint, forwarded from ``to_rdf_graph``. Returns ------- URIRef | BNode The subject node to use for the resource. Examples -------- Using a plain function:: def my_generator(model: RdfBaseModel, *, base_uri: str | None = None) -> URIRef | BNode: return EX[type(model).__name__ + "/" + str(model.id)] person = Person(id="alice", rdf_uri_generator=my_generator) Using a class-based generator:: class PrefixedGenerator: def __init__(self, prefix: str) -> None: self.prefix = prefix def __call__(self, model: RdfBaseModel, *, base_uri: str | None = None) -> URIRef | BNode: return URIRef(self.prefix + str(model.id)) """ def __call__( self, model: RdfBaseModel, *, base_uri: str | None = None, ) -> URIRef | BNode: ...
[docs] class DefaultUriGenerator: """Default RDF subject URI generator. Encapsulates the standard URI resolution strategy used by :class:`RdfBaseModel` out of the box: 1. If the model has an ``rdf_id_field`` and that field is non-``None``, build a URI from the value: * If the value already looks like an absolute URI, use it directly. * If the model's class defines ``rdf_namespace``, prepend it. * If a ``base_uri`` was provided to the serialiser, prepend it. * Otherwise use the raw string as a URI. 2. If no identifier was found and ``auto_uuid`` is ``True``, mint a new UUID-based URI (using the namespace if available, otherwise ``urn:uuid:<uuid4>``). 3. If ``auto_uuid`` is ``False``, return a :class:`rdflib.BNode`. Parameters ---------- auto_uuid : bool Whether to generate a UUID URI when no explicit identifier is present. Default is ``True``. Why ``auto_uuid=True`` is the default -------------------------------------- From a strict RDF perspective, a resource with no stable global identifier *should* be represented as a Blank Node (BNode): anonymous, scoped to a single graph, and carrying no identity commitment. However, ``auto_uuid=True`` is the pragmatic default for developer experience: * **Graph portability** — UUID URIs survive serialisation and can be referenced across graph boundaries; BNodes cannot. * **Predictable round-trips** — ``from_rdf`` can reconstruct the subject URI from a UUID URI. BNode identifiers are opaque and may change across parse/serialise cycles. * **Merge safety** — merging two graphs that both contain BNodes can silently collapse unrelated resources; UUID URIs are globally unique. Set ``auto_uuid=False`` when you explicitly want anonymous resources (e.g. reified statements, inline blank-node structures) and accept the inability to reference them externally. See Also -------- TemplateUriGenerator : URI from a pattern string. HashUriGenerator : Deterministic URI from field content. CompositeUriGenerator : Priority-ordered fallback chain. Examples -------- Default usage (auto UUID enabled):: person = Person(rdf_uri_generator=DefaultUriGenerator()) Disable UUID fallback (produces BNodes instead):: person = Person(rdf_uri_generator=DefaultUriGenerator(auto_uuid=False)) """ def __init__(self, *, auto_uuid: bool = True) -> None: self.auto_uuid = auto_uuid def __call__( self, model: RdfBaseModel, *, base_uri: str | None = None, ) -> URIRef | BNode: """Generate the subject URI for *model*.""" identifier: str | None = None if model.rdf_id_field: value = getattr(model, model.rdf_id_field, None) if value is not None: identifier = str(value) if identifier: if _looks_like_uri(identifier): return URIRef(identifier) namespace = model._namespace_string() if namespace: return URIRef(namespace + identifier) if base_uri: return URIRef(_normalise_base(base_uri) + identifier) return URIRef(identifier) if self.auto_uuid: namespace = model._namespace_string() if namespace: return URIRef(namespace + str(uuid.uuid4())) return URIRef(f"urn:uuid:{uuid.uuid4()}") return BNode()
[docs] class RdfBaseModel(BaseModel): """Base class for Pydantic models with RDF serialization capabilities. This class extends Pydantic's BaseModel to provide automatic conversion to and from RDF graphs. Models inheriting from RdfBaseModel can be serialized to various RDF formats (Turtle, RDF/XML, JSON-LD, etc.) and deserialized back to Python objects. Class Attributes ---------------- rdf_type : str | URIRef | None The RDF type (rdf:type) for instances of this class. Typically set to a vocabulary class URI like `FOAF.Person` or `SKOS.Concept`. If None, no rdf:type triple is added to the graph. Note: `None` is valid only for base or abstract models; concrete vocabulary classes should explicitly define an `rdf_type`. rdf_namespace : str | Namespace | None The default namespace for generating subject URIs. Used when an instance has an `id` but not a full URI. For example, with namespace `FOAF` and id `"john"`, the subject becomes `<http://xmlns.com/foaf/0.1/john>`. rdf_id_field : str | None The name of the field to use for the RDF subject identifier. Defaults to `"id"`. Set to None to disable ID field mapping and always use UUIDs. rdf_prefixes : Dict[str, str | Namespace] Namespace prefix bindings for RDF serialization. Used to create readable output with prefixes like `foaf:name` instead of full URIs. Automatically includes 'rdf' and 'xsd' prefixes. Instance Attributes ------------------- id : Any, optional If `rdf_id_field` is "id" (default), this field contains the subject identifier. Can be a short string (combined with namespace) or a full URI. Methods ------- to_rdf_graph(graph=None, *, base_uri=None) -> Graph Serialize this model instance into an rdflib Graph. to_rdf(format="turtle", *, base_uri=None, **kwargs) -> str Serialize this model instance to an RDF string in the specified format. from_rdf_graph(graph, subject, *, base_uri=None) -> RdfBaseModel Class method to deserialize a model from an RDF graph. from_rdf(data, *, format="turtle", subject=None, base_uri=None) -> RdfBaseModel Class method to deserialize a model from an RDF string or bytes. Configuration ------------- The model_config allows arbitrary types (URIRef, Literal, etc.) in fields. Examples -------- Basic model definition:: from rdflib import Namespace, FOAF from typing import Annotated, Optional, List class Person(RdfBaseModel): rdf_type: str = str(FOAF.Person) rdf_namespace = FOAF rdf_prefixes = {"foaf": FOAF} name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None email: Annotated[Optional[List[str]], RdfProperty(FOAF.mbox)] = None Creating and serializing:: person = Person(name=["Alice Smith"], email=["alice@example.org"]) turtle_output = person.to_rdf("turtle") # Output includes proper @prefix declarations and triples Deserializing:: restored = Person.from_rdf(turtle_output, format="turtle") assert restored.name == ["Alice Smith"] With custom ID:: person = Person(id="alice", name=["Alice Smith"]) # Subject URI becomes: <http://xmlns.com/foaf/0.1/alice> With full URI as ID:: person = Person(id="http://example.org/people/alice", name=["Alice"]) # Subject URI is: <http://example.org/people/alice> Nested objects:: class Organization(RdfBaseModel): rdf_type: str = str(FOAF.Organization) name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None class Person(RdfBaseModel): rdf_type: str = str(FOAF.Person) name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None org: Annotated[Optional[List[Organization]], RdfProperty(FOAF.member)] = None person = Person( name=["Alice"], org=[Organization(name=["ACME Corp"])] ) # Both person and organization are serialized to the graph Notes ----- - All fields mapped to RDF should use `Annotated[..., RdfProperty(...)]` - Multi-valued properties use `Optional[List[T]]` (standard in RDF) - The `id` field is optional; if not provided, a UUID is generated - Nested RdfBaseModel instances are automatically serialized - Round-trip serialization is lossless for supported types - Custom serializers/parsers can handle complex types See Also -------- RdfProperty : Metadata for field-to-predicate mapping """ model_config = ConfigDict(arbitrary_types_allowed=True) rdf_type: ClassVar[str | URIRef | None] = None rdf_namespace: ClassVar[str | Namespace | None] = None rdf_id_field: ClassVar[str | None] = "id" rdf_prefixes: ClassVar[dict[str, str | Namespace]] = {} rdf_uri_generator: RdfUriGenerator = Field(default_factory=DefaultUriGenerator, exclude=True)
[docs] def to_rdf_graph( self, graph: Graph | None = None, *, base_uri: str | None = None, rdf_uri_generator: RdfUriGenerator | None = None, ) -> Graph: """Serialize the model instance into an rdflib Graph. This method converts the Pydantic model instance into RDF triples and adds them to an rdflib Graph. All fields annotated with RdfProperty are converted to RDF predicates and objects. Nested RdfBaseModel instances are recursively serialized. Parameters ---------- graph : Graph | None, optional An existing rdflib Graph to add triples to. If None, a new Graph is created. Default is None. base_uri : str | None, optional A base URI for generating subject URIs when the model doesn't have a full URI identifier. Used for relative identifier resolution. Default is None. rdf_uri_generator : RdfUriGenerator | None, optional A custom function to generate subject URIs for model instances. The function receives the model instance and should return an rdflib URIRef or BNode. This overrides the model's own rdf_uri_generator if provided. Returns ------- Graph The rdflib Graph containing the serialized RDF triples. Examples -------- Basic serialization:: person = Person(name=["Alice"]) graph = person.to_rdf_graph() # graph now contains triples for the person Adding to existing graph:: graph = Graph() person1 = Person(name=["Alice"]) person2 = Person(name=["Bob"]) person1.to_rdf_graph(graph) person2.to_rdf_graph(graph) # graph contains triples for both persons With base URI:: person = Person(id="alice", name=["Alice"]) graph = person.to_rdf_graph(base_uri="http://example.org/people/") # Subject becomes: <http://example.org/people/alice> Notes ----- - Namespace prefixes from rdf_prefixes are automatically bound - rdf:type triple is added if rdf_type is set - None values and empty lists are skipped - The subject URI is generated from the id field or a UUID See Also -------- to_rdf : Serialize directly to a string format from_rdf_graph : Deserialize from a Graph """ graph = graph if graph is not None else Graph() self._serialise_into_graph(graph, base_uri=base_uri, rdf_uri_generator=rdf_uri_generator) return graph
[docs] def to_rdf( self, format: str = "turtle", *, base_uri: str | None = None, rdf_uri_generator: RdfUriGenerator | None = None, **kwargs: Any, ) -> str: """Serialize the model instance to an RDF string. This is a convenience method that creates a Graph, serializes the model into it, and then serializes the Graph to the specified format. Parameters ---------- format : str, optional The RDF serialization format. Supported formats include: - "turtle" (default): Turtle/Trig format - "xml" or "pretty-xml": RDF/XML format - "json-ld": JSON-LD format - "nt" or "ntriples": N-Triples format - "n3": Notation3 format Default is "turtle". base_uri : str | None, optional A base URI for generating subject URIs. Default is None. rdf_uri_generator : RdfUriGenerator | None, optional A custom function to generate subject URIs for model instances. The function receives the model instance and should return an rdflib URIRef or BNode. This overrides the model's own rdf_uri_generator if provided. **kwargs : Any Additional keyword arguments passed to rdflib's serialize() method. Returns ------- str The serialized RDF as a string. Examples -------- Turtle format (default):: person = Person(name=["Alice Smith"]) turtle = person.to_rdf("turtle") print(turtle) # @prefix foaf: <http://xmlns.com/foaf/0.1/> . # foaf:alice a foaf:Person ; # foaf:name "Alice Smith" . RDF/XML format:: xml = person.to_rdf("xml") JSON-LD format:: jsonld = person.to_rdf("json-ld") N-Triples format:: ntriples = person.to_rdf("ntriples") Notes ----- - Turtle format is most human-readable with prefix support - Format names are case-insensitive - The output encoding is UTF-8 See Also -------- to_rdf_graph : Get the Graph object directly from_rdf : Deserialize from an RDF string """ graph = self.to_rdf_graph(base_uri=base_uri, rdf_uri_generator=rdf_uri_generator) return graph.serialize(format=format, **kwargs) # type: ignore[no-any-return]
[docs] @classmethod def from_rdf_graph( cls: type[T], graph: Graph, subject: URIRef | BNode | str, *, base_uri: str | None = None, ) -> T: """Deserialize a model instance from an RDF graph. This class method reconstructs a Pydantic model instance from RDF triples in a Graph. It extracts values for all fields annotated with RdfProperty by querying the graph for triples with the specified subject. Parameters ---------- graph : Graph The rdflib Graph containing the RDF data. subject : URIRef | str The subject URI of the resource to deserialize. Can be a URIRef or a string that will be converted to a URIRef. base_uri : str | None, optional A base URI for converting the subject back to a relative identifier for the id field. If the subject starts with this base, the remainder is used as the id. Default is None. Returns ------- RdfBaseModel A new instance of the model class populated with data from the graph. Raises ------ ValidationError If the extracted values don't pass Pydantic validation. Examples -------- Basic deserialization:: graph = Graph() graph.parse(data=turtle_data, format="turtle") person = Person.from_rdf_graph( graph, URIRef("http://example.org/people/alice") ) With base URI:: person = Person.from_rdf_graph( graph, URIRef("http://example.org/people/alice"), base_uri="http://example.org/people/" ) # person.id becomes "alice" Nested objects:: # If the graph contains triples for both Person and Organization, # nested objects are automatically reconstructed person = Person.from_rdf_graph(graph, subject_uri) assert isinstance(person.org[0], Organization) Notes ----- - Multi-valued properties are always returned as lists - Missing properties result in None values - Nested RdfBaseModel instances are recursively deserialized - Custom parsers in RdfProperty are applied during conversion - Type coercion follows Pydantic's validation rules See Also -------- from_rdf : Deserialize from an RDF string to_rdf_graph : Serialize to a Graph """ subject_uri = _ensure_uri(subject) if subject_uri is None: msg = "Subject URI cannot be None" raise ValueError(msg) values: dict[str, Any] = {} for name, field in cls.model_fields.items(): prop = _get_rdf_property(field) if prop is None: continue predicate = prop.predicate_uri() is_list, accepts_scalar, inner_type = _field_type_info(field) # Detect whether this field is a LocalizedStr (canonical list[LangString]) is_localized = _is_localized_str_field(field) objects = list(graph.objects(subject_uri, predicate)) if not objects: continue if is_localized: # Produce list[LangString] directly – Pydantic's BeforeValidator # inside LocalizedStr will deduplicate. lang_items: list[LangString] = [] for obj in objects: if isinstance(obj, Literal): lang_items.append(LangString(value=str(obj), lang=obj.language)) else: lang_items.append(LangString(value=str(obj), lang=None)) values[name] = lang_items continue model_type = _get_rdf_model_type(inner_type) if model_type: items: list[Any] = [] for obj in objects: if isinstance(obj, (URIRef, BNode)): items.append(model_type.from_rdf_graph(graph, obj, base_uri=base_uri)) else: items.append(_node_to_python(obj, inner_type, prop)) else: items = [_node_to_python(obj, inner_type, prop) for obj in objects] if is_list: # Return scalar when the field accepts both scalar and list # and exactly one value was found in the graph. if accepts_scalar and len(items) == 1: values[name] = items[0] else: values[name] = items else: values[name] = items[0] id_field = cls.rdf_id_field if id_field and id_field not in values: identifier = cls._identifier_from_subject(subject_uri, base_uri=base_uri) if identifier is not None: values[id_field] = identifier return cls(**values)
[docs] @classmethod def from_rdf( cls: type[T], data: str | bytes, format: str = "turtle", *, subject: URIRef | BNode | str | None = None, base_uri: str | None = None, ) -> T: """Deserialize a model instance from an RDF string or bytes. This class method parses RDF data and reconstructs a Pydantic model instance. If the subject is not specified, it attempts to infer it from the graph (using rdf:type if available, or assuming a single subject). Parameters ---------- data : str | bytes The RDF data as a string or bytes. Can be in any format supported by rdflib (Turtle, RDF/XML, JSON-LD, N-Triples, etc.). format : str, optional The RDF format of the input data. Common formats: - "turtle": Turtle/Trig format (default) - "xml": RDF/XML format - "json-ld": JSON-LD format - "nt" or "ntriples": N-Triples format - "n3": Notation3 format Default is "turtle". subject : URIRef | str | None, optional The subject URI to deserialize. If None, the subject is automatically inferred from the graph. Use this when the graph contains multiple resources. Default is None. base_uri : str | None, optional A base URI for generating relative identifiers. Default is None. Returns ------- RdfBaseModel A new instance of the model class populated with the RDF data. Raises ------ ValueError If subject is None and the subject cannot be inferred, or if multiple subjects are found and none is specified. ValidationError If the deserialized data doesn't pass Pydantic validation. Examples -------- From Turtle string:: turtle = ''' @prefix foaf: <http://xmlns.com/foaf/0.1/> . foaf:alice a foaf:Person ; foaf:name "Alice Smith" ; foaf:mbox "alice@example.org" . ''' person = Person.from_rdf(turtle, format="turtle") With explicit subject:: person = Person.from_rdf( turtle_data, format="turtle", subject="http://example.org/people/alice" ) From RDF/XML:: person = Person.from_rdf(xml_data, format="xml") From JSON-LD:: person = Person.from_rdf(jsonld_data, format="json-ld") Round-trip example:: # Serialize original = Person(name=["Alice"]) turtle = original.to_rdf("turtle") # Deserialize restored = Person.from_rdf(turtle) assert restored.name == original.name Notes ----- - Subject inference works best with single-resource graphs - If rdf_type is set, it's used to find the subject - Format detection is not automatic; always specify the format - Bytes input is decoded as UTF-8 See Also -------- from_rdf_graph : Deserialize from a Graph object to_rdf : Serialize to an RDF string """ graph = Graph() graph.parse(data=data, format=format) if subject is None: subject = cls._infer_subject(graph) if subject is None: raise ValueError("Unable to determine subject for RDF document; provide the subject explicitly.") return cls.from_rdf_graph(graph, subject, base_uri=base_uri)
def _serialise_into_graph( self, graph: Graph, *, base_uri: str | None = None, rdf_uri_generator: RdfUriGenerator | None = None, ) -> URIRef | BNode: """Internal method to serialize this model into an RDF graph. Converts all annotated fields to RDF triples and adds them to the graph. This method handles the core serialization logic. Parameters ---------- graph : Graph The rdflib Graph to add triples to. base_uri : str | None, optional Base URI for subject generation. rdf_uri_generator : RdfUriGenerator | None, optional A custom function to generate subject URIs for model instances. Returns ------- URIRef | BNode The subject URI of the serialized resource. """ subject = self._subject_uri(base_uri=base_uri, rdf_uri_generator=rdf_uri_generator) self._bind_prefixes(graph) rdf_type_uri = _ensure_uri(self.rdf_type) if rdf_type_uri is not None: graph.add((subject, RDF.type, rdf_type_uri)) for name, field in self.__class__.model_fields.items(): prop = _get_rdf_property(field) if prop is None: continue value = getattr(self, name) if value is None: continue predicate = prop.predicate_uri() # Fast path for LocalizedStr fields (LangStringList) if isinstance(value, LangStringList): for ls_item in value: graph.add( ( subject, predicate, Literal(ls_item.value, lang=ls_item.lang), ) ) continue is_list, _accepts_scalar, inner_type = _field_type_info(field) # Support both single values and lists for fields that allow both if is_list: values = value if isinstance(value, list) else [value] else: values = [value] for item in values: if item is None: continue node = self._value_to_node( item, inner_type, prop, graph, base_uri, rdf_uri_generator=rdf_uri_generator, ) graph.add((subject, predicate, node)) return subject @classmethod def _identifier_from_subject(cls, subject: URIRef, *, base_uri: str | None = None) -> str | None: """Extract an identifier from a subject URI. Attempts to convert a subject URI back to a short identifier by removing the namespace or base URI prefix. Parameters ---------- subject : URIRef The subject URI to convert. base_uri : str | None, optional Base URI to strip from the subject. Returns ------- str | None The extracted identifier, or the full URI if no prefix matches. """ subject_str = str(subject) namespace = cls._namespace_string() if namespace and subject_str.startswith(namespace): return subject_str[len(namespace) :] if base_uri: normalised = _normalise_base(base_uri) if subject_str.startswith(normalised): return subject_str[len(normalised) :] return subject_str @classmethod def _namespace_string(cls) -> str | None: """Get the namespace as a string. Returns ------- str | None The namespace URI as a string, or None if no namespace is set. """ namespace = cls.rdf_namespace if namespace is None: return None if isinstance(namespace, Namespace): return str(namespace) return str(namespace) def _subject_uri( self, *, base_uri: str | None = None, rdf_uri_generator: RdfUriGenerator | None = None, ) -> URIRef | BNode: """Generate the subject URI for this instance. Delegates entirely to the active :class:`RdfUriGenerator`. The *rdf_uri_generator* argument, when provided, overrides the instance's own generator for this single call (used by ``to_rdf_graph`` and ``to_rdf``). Parameters ---------- base_uri : str | None, optional Base URI forwarded to the generator. rdf_uri_generator : RdfUriGenerator | None, optional Call-site override generator; falls back to ``self.rdf_uri_generator``. Returns ------- URIRef | BNode The subject URI or Blank Node for this resource. """ gen = rdf_uri_generator if rdf_uri_generator is not None else self.rdf_uri_generator return gen(self, base_uri=base_uri) def _bind_prefixes(self, graph: Graph) -> None: """Bind namespace prefixes to the graph for readable serialization. Parameters ---------- graph : Graph The graph to bind prefixes to. """ prefixes = _default_prefixes() prefixes.update({key: str(value) for key, value in self.rdf_prefixes.items()}) for prefix, namespace in prefixes.items(): graph.bind(prefix, namespace) def _value_to_node( self, value: Any, expected_type: Any, prop: RdfProperty, graph: Graph, base_uri: str | None, *, rdf_uri_generator: RdfUriGenerator | None = None, ) -> URIRef | BNode | Literal: """Convert a Python value to an RDF node (URIRef, BNode, or Literal). Handles various Python types and converts them to appropriate RDF representations based on the field type and RdfProperty configuration. Parameters ---------- value : Any The Python value to convert. expected_type : Any The expected type from the field annotation. prop : RdfProperty The RDF property metadata. graph : Graph The graph for nested object serialization. base_uri : str | None Base URI for nested objects. rdf_uri_generator : RdfUriGenerator | None, optional A custom function to generate subject URIs for model instances. Returns ------- URIRef | BNode | Literal The RDF node representation of the value. """ if prop.serializer is not None: value = prop.serializer(value) if isinstance(value, RdfBaseModel): return value._serialise_into_graph(graph, base_uri=base_uri, rdf_uri_generator=rdf_uri_generator) if isinstance(value, URIRef): return value if isinstance(value, Literal): return value if isinstance(value, Enum): value = value.value if isinstance(value, bytes): import base64 encoded = base64.b64encode(value).decode("ascii") return Literal(encoded, datatype=XSD.base64Binary) if isinstance(value, LangString): return Literal(value.value, lang=value.lang) if isinstance(value, (datetime, date, time, int, float, bool, Decimal, uuid.UUID)): datatype = prop.datatype_uri() if datatype is None: datatype = _python_datatype(value) return Literal(value, datatype=datatype) if isinstance(value, str): datatype = prop.datatype_uri() if prop.language: return Literal(value, lang=prop.language) if datatype is not None: return Literal(value, datatype=datatype) # Check if URIRef is an expected type origin = get_origin(expected_type) is_union = origin is Union or (hasattr(types, "UnionType") and origin is types.UnionType) if is_union: allowed_types = get_args(expected_type) else: allowed_types = (expected_type,) if URIRef in allowed_types and _looks_like_uri(value): return URIRef(value) return Literal(value) return Literal(value) @classmethod def _infer_subject(cls, graph: Graph) -> URIRef | BNode | None: """Infer the subject URI from a graph. Attempts to determine which subject in the graph corresponds to this model type, using rdf:type if available or assuming a single subject. Parameters ---------- graph : Graph The graph to analyze. Returns ------- URIRef | BNode | None The inferred subject URI, or None if it cannot be determined. Raises ------ ValueError If multiple subjects are found and cannot be disambiguated. """ rdf_type_uri = _ensure_uri(cls.rdf_type) if rdf_type_uri is not None: subjects = _unique(graph.subjects(RDF.type, rdf_type_uri)) if not subjects: return None if len(subjects) > 1: raise ValueError( "Multiple resources of the requested rdf:type were found; provide the subject explicitly." ) return cast(URIRef | BNode, subjects[0]) subjects = _unique(graph.subjects()) if not subjects: return None if len(subjects) > 1: raise ValueError("Multiple resources found in graph; provide the subject explicitly.") return cast(URIRef | BNode, subjects[0])
def _get_rdf_property(field: Any) -> RdfProperty | None: """Extract RdfProperty metadata from a field's metadata or annotation. Parameters ---------- field : Any A Pydantic field information object. Returns ------- RdfProperty | None The RdfProperty if found in metadata, otherwise None. """ metadata = getattr(field, "metadata", ()) or () for item in metadata: if isinstance(item, RdfProperty): return item annotation = getattr(field, "annotation", None) if annotation is not None: for item in _annotation_metadata(annotation): if isinstance(item, RdfProperty): return item return None def _field_type_info(field: Any) -> tuple[bool, bool, Any]: """Determine if a field is a list type and extract its inner type. Also handles Optional types by unwrapping Union[T, None]. Parameters ---------- field : Any A Pydantic field information object. Returns ------- tuple[bool, bool, Any] A tuple of (is_list, accepts_scalar, inner_type). - is_list is True if the field accepts list values. - accepts_scalar is True if the field also accepts a single (non-list) value, e.g. ``str | list[str] | None``. - inner_type is the type of individual elements. """ annotation = getattr(field, "annotation", Any) annotation = _unwrap_annotation(annotation) origin = get_origin(annotation) # Handle both Union[T, None], T | None syntax and Union with list if origin is Union or origin is types.UnionType: args = get_args(annotation) # Check if any arg is a list to support Union[str, list[str]] has_list = False list_item_type: Any = Any non_none_non_list_args: list[Any] = [] for arg in args: arg_unwrapped = _unwrap_annotation(arg) if get_origin(arg_unwrapped) is list: has_list = True list_args = get_args(arg_unwrapped) list_item_type = _unwrap_annotation(list_args[0]) if list_args else Any elif arg is not type(None): non_none_non_list_args.append(arg) if has_list: # The field has a list variant. If it also has non-None scalar # type args, it accepts scalars too (e.g. str | list[str] | None). accepts_scalar = len(non_none_non_list_args) > 0 return True, accepts_scalar, list_item_type # Existing logic for unwrapping Optional[T] non_none_args = [arg for arg in args if arg is not type(None)] if len(non_none_args) == 1: annotation = _unwrap_annotation(non_none_args[0]) origin = get_origin(annotation) if origin is list: item_type = _unwrap_annotation(get_args(annotation)[0]) return True, False, item_type # LangStringList is a concrete subclass of list[LangString], # so get_origin returns list but get_args may be empty. if isinstance(annotation, type) and issubclass(annotation, LangStringList): return True, False, LangString return False, False, annotation def _unwrap_annotation(annotation: Any) -> Any: """Unwrap Annotated type to get the actual type. Recursively unwraps until reaching a non-Annotated type. Parameters ---------- annotation : Any A potentially Annotated type hint. Returns ------- Any The unwrapped type, or the original if not Annotated. """ while True: origin = get_origin(annotation) if origin is None: return annotation if origin is Annotated: annotation = get_args(annotation)[0] continue return annotation def _annotation_metadata(annotation: Any) -> tuple[Any, ...]: """Extract metadata from an Annotated type. Parameters ---------- annotation : Any A type annotation, possibly Annotated. Returns ------- tuple[Any, ...] The metadata items if Annotated, otherwise empty tuple. """ if get_origin(annotation) is Annotated: args = get_args(annotation) return tuple(args[1:]) return () def _node_to_python(node: Any, expected_type: Any, prop: RdfProperty) -> Any: """Convert an RDF node to a Python value. Handles deserialization of URIRef and Literal nodes to appropriate Python types based on field type hints and RdfProperty configuration. Parameters ---------- node : Any The RDF node to convert (URIRef or Literal). expected_type : Any The expected Python type from field annotations. prop : RdfProperty The RDF property metadata. Returns ------- Any The converted Python value. Raises ------ TypeError If a nested RDF model is encountered (should be handled separately). """ if prop.parser is not None: return prop.parser(node) if expected_type is LangString or (isinstance(expected_type, type) and issubclass(expected_type, LangString)): if isinstance(node, Literal): return LangString(value=str(node), lang=node.language) return LangString(value=str(node)) if _is_rdf_model(expected_type): raise TypeError("Nested RDF models should be handled separately.") if expected_type is URIRef: if isinstance(node, URIRef): return node return URIRef(str(node)) if isinstance(node, Literal): value = node.toPython() else: value = str(node) if expected_type is Any or expected_type is None: return value if expected_type is str: return str(value) if expected_type in {int, float, bool}: try: return expected_type(value) except (TypeError, ValueError): return value if expected_type is datetime: if isinstance(value, datetime): return value try: return datetime.fromisoformat(str(value)) except ValueError: return value if expected_type is date: if isinstance(value, date): return value try: return date.fromisoformat(str(value)) except ValueError: return value if expected_type is time: if isinstance(value, time): return value try: return time.fromisoformat(str(value)) except ValueError: return value if expected_type is Decimal: try: return Decimal(value) except (ValueError, TypeError, ArithmeticError): return value if expected_type is bytes: if isinstance(value, bytes): return value # rdflib handles base64 decoding for XSD.base64Binary return value if expected_type is uuid.UUID: if isinstance(value, uuid.UUID): return value try: return uuid.UUID(str(value)) except (ValueError, TypeError): return value if isinstance(expected_type, type) and issubclass(expected_type, Enum): return expected_type(value) return value def _python_datatype(value: Any) -> URIRef | None: """Infer XSD datatype URI from a Python value. Parameters ---------- value : Any A Python value to determine the datatype for. Returns ------- URIRef | None The XSD datatype URI, or None if no mapping exists. """ if isinstance(value, bool): return XSD.boolean if isinstance(value, int): return XSD.integer if isinstance(value, float): return XSD.double if isinstance(value, datetime): return XSD.dateTime if isinstance(value, date): return XSD.date if isinstance(value, time): return XSD.time if isinstance(value, Decimal): return XSD.decimal if isinstance(value, bytes): return XSD.base64Binary if isinstance(value, uuid.UUID): return XSD.string return None def _ensure_uri(value: str | URIRef | Namespace | None) -> URIRef | None: """Convert various types to a URIRef. Parameters ---------- value : str | URIRef | Namespace | None A value that might represent a URI. Returns ------- URIRef | None The URIRef representation, or None if the value is None. """ if value is None: return None if isinstance(value, URIRef): return value if isinstance(value, Namespace): return URIRef(str(value)) return URIRef(str(value)) URI_PATTERN = re.compile(r"^[a-zA-Z][a-zA-Z0-9+.-]*:") def _looks_like_uri(value: str) -> bool: """Check if a string looks like a URI using a URI scheme pattern. Parameters ---------- value : str A string to check. Returns ------- bool True if the string starts with a URI scheme (e.g., 'http:', 'urn:'). """ return bool(URI_PATTERN.match(value)) def _normalise_base(base_uri: str) -> str: """Normalize a base URI to ensure it ends with '/' or '#'. Parameters ---------- base_uri : str A base URI string. Returns ------- str The normalized base URI. """ if base_uri.endswith(("/", "#")): return base_uri return base_uri + "/" def _unique(values: Iterable[Any]) -> list[Any]: """Return unique items from an iterable, preserving order. Parameters ---------- values : Iterable[Any] An iterable of items. Returns ------- list[Any] A list with duplicates removed, in original order. """ seen = set() result = [] for value in values: if value not in seen: seen.add(value) result.append(value) return result def _default_prefixes() -> dict[str, str]: """Get the default namespace prefixes for RDF serialization. Returns ------- dict[str, str] A dictionary mapping prefix strings to namespace URI strings. Includes rdf and xsd by default. """ return {"rdf": str(RDF), "xsd": str(XSD)} def _is_localized_str_field(field: Any) -> bool: """Check whether a field's annotation resolves to ``LocalizedStr``. ``LocalizedStr`` is ``Annotated[list[LangString], BeforeValidator(...)]``. After Pydantic unwrapping we look for ``list[LangString]`` anywhere in the annotation tree (including ``Union[..., None]`` wrappers). """ annotation = getattr(field, "annotation", None) if annotation is None: return False return _annotation_contains_lang_list(annotation) def _annotation_contains_lang_list(annotation: Any) -> bool: """Return *True* if *annotation* is or contains ``LangStringList`` or ``list[LangString]``.""" # Unwrap Annotated unwrapped = _unwrap_annotation(annotation) # Direct match: LangStringList (concrete class, not generic) if unwrapped is LangStringList: return True origin = get_origin(unwrapped) # Direct match: list[LangString] if origin is list: args = get_args(unwrapped) if args and (args[0] is LangString or _unwrap_annotation(args[0]) is LangString): return True return False # Union: recurse into each branch if origin is Union or origin is types.UnionType: for arg in get_args(unwrapped): if arg is type(None): continue if _annotation_contains_lang_list(arg): return True return False def _is_rdf_model(value: Any) -> bool: """Check if a value is an RdfBaseModel subclass. Parameters ---------- value : Any A value to check (typically a type). Returns ------- bool True if value is a class and a subclass of RdfBaseModel. """ return isinstance(value, type) and issubclass(value, RdfBaseModel) def _get_rdf_model_type(type_hint: Any) -> type[RdfBaseModel] | None: """Get the RdfBaseModel type from a type hint (possibly a Union). Parameters ---------- type_hint : Any The type hint to check. Returns ------- Type[RdfBaseModel] | None The RdfBaseModel subclass if found, otherwise None. """ if _is_rdf_model(type_hint): return type_hint # type: ignore[no-any-return] origin = get_origin(type_hint) if origin is Union or (hasattr(types, "UnionType") and origin is types.UnionType): for arg in get_args(type_hint): if _is_rdf_model(arg): return arg # type: ignore[no-any-return] return None __all__ = ["RdfBaseModel", "RdfProperty", "LangString", "LangStringList", "LocalizedStr"] # Ensure defaults are preserved when using lightweight pydantic substitutes. RdfBaseModel.rdf_id_field = "id"