"""Pydantic RDF Base Model - Bridge between Pydantic models and RDF graphs.
This module provides a base class and utilities for seamlessly converting Pydantic models
to and from RDF (Resource Description Framework) graphs using rdflib. It enables type-safe,
validated RDF data modeling with automatic serialization and deserialization.
The core components are:
- :class:`RdfBaseModel`: A Pydantic BaseModel subclass that provides RDF serialization
and deserialization capabilities. Models inheriting from this class can be automatically
converted to/from RDF graphs in various formats (Turtle, RDF/XML, JSON-LD, etc.).
- :class:`RdfProperty`: A metadata descriptor used in type annotations to map Pydantic
fields to RDF predicates, with optional datatype and language specifications.
Basic Usage
-----------
Define a model by inheriting from RdfBaseModel and annotating fields with RdfProperty::
from typing import Annotated, Optional, List
from rdflib import Namespace, URIRef
from dartfx.rdf.pydantic import RdfBaseModel, RdfProperty
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
class Person(RdfBaseModel):
rdf_type: str = str(FOAF.Person)
rdf_namespace = FOAF
rdf_prefixes = {"foaf": FOAF}
name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
email: Annotated[Optional[List[str]], RdfProperty(FOAF.mbox)] = None
knows: Annotated[Optional[List[URIRef | Person]], RdfProperty(FOAF.knows)] = None
Serialize to RDF::
person = Person(name=["Alice"], email=["alice@example.org"])
turtle = person.to_rdf("turtle")
# Output: Turtle format RDF with proper namespace bindings
Deserialize from RDF::
restored = Person.from_rdf(turtle, format="turtle")
assert restored.name == ["Alice"]
Key Features
------------
- **Type Safety**: Full Pydantic validation for RDF data
- **Multiple Formats**: Serialize to Turtle, RDF/XML, JSON-LD, N-Triples, etc.
- **Round-trip Support**: Lossless conversion between Python objects and RDF
- **Nested Objects**: Support for nested RdfBaseModel instances
- **List Values**: Automatic handling of multi-valued properties
- **Custom Datatypes**: Specify XSD datatypes and language tags
- **Namespace Management**: Automatic prefix binding for clean serialization
- **Flexible Identifiers**: Use custom ID fields or auto-generate UUIDs
Advanced Features
-----------------
Custom serializers and parsers::
def serialize_date(d: date) -> str:
return d.isoformat()
def parse_date(node: Literal) -> date:
return date.fromisoformat(str(node))
birth_date: Annotated[Optional[date], RdfProperty(
SCHEMA.birthDate,
serializer=serialize_date,
parser=parse_date
)] = None
Language-tagged literals::
description: Annotated[Optional[List[str]], RdfProperty(
DC.description,
language="en"
)] = None
Custom datatypes::
age: Annotated[Optional[int], RdfProperty(
FOAF.age,
datatype=XSD.integer
)] = None
Examples
--------
Simple metadata example::
from rdflib import Namespace, DCTERMS
class Document(RdfBaseModel):
rdf_namespace = DCTERMS
rdf_prefixes = {"dcterms": DCTERMS}
title: Annotated[Optional[List[str]], RdfProperty(DCTERMS.title)] = None
creator: Annotated[Optional[List[str]], RdfProperty(DCTERMS.creator)] = None
doc = Document(title=["My Document"], creator=["John Doe"])
print(doc.to_rdf("turtle"))
Nested objects example::
class Organization(RdfBaseModel):
rdf_type: str = str(FOAF.Organization)
name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
class Person(RdfBaseModel):
rdf_type: str = str(FOAF.Person)
name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
works_for: Annotated[Optional[List[Organization]], RdfProperty(FOAF.workplaceHomepage)] = None
org = Organization(name=["ACME Corp"])
person = Person(name=["Alice"], works_for=[org])
# Both person and organization are serialized to the graph
Notes
-----
- Field names don't need to match RDF predicate names - use RdfProperty to map them
- Use `Optional[List[T]]` for multi-valued properties (standard in RDF)
- The `id` field is special and maps to the RDF subject URI
- Custom `rdf_id_field` can be specified per model
- Auto-generated UUIDs are used when no ID is provided
- Namespace prefixes improve readability of serialized output
See Also
--------
- rdflib documentation: https://rdflib.readthedocs.io/
- Pydantic documentation: https://docs.pydantic.dev/
- RDF Primer: https://www.w3.org/TR/rdf11-primer/
"""
from __future__ import annotations
import re
import types
import uuid
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import date, datetime, time
from decimal import Decimal
from enum import Enum
from typing import (
TYPE_CHECKING,
Annotated,
Any,
ClassVar,
Protocol,
TypeVar,
Union,
cast,
get_args,
get_origin,
runtime_checkable,
)
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
from rdflib import RDF, XSD, BNode, Graph, Literal, Namespace, URIRef
T = TypeVar("T", bound="RdfBaseModel")
[docs]
class LangString(BaseModel):
"""A string with an optional language tag.
This class is used to represent RDF language-tagged literals in Pydantic
models. It provides a structured way to handle localized strings while
maintaining compatibility with Pydantic validation.
Attributes
----------
value : str
The string content of the literal.
lang : str | None, optional
The language tag (e.g., "en", "fr", "de"). Default is None.
"""
value: str
lang: str | None = None
def __str__(self) -> str:
return self.value
def __repr__(self) -> str:
if self.lang:
return f'"{self.value}"@{self.lang}'
return f'"{self.value}"'
def __eq__(self, other: Any) -> bool:
if isinstance(other, LangString):
return self.value == other.value and self.lang == other.lang
return super().__eq__(other)
def __hash__(self) -> int:
return hash((self.value, self.lang))
# ---------------------------------------------------------------------------
# Input types accepted by LocalizedStr coercion
# ---------------------------------------------------------------------------
LocalizedStrInput = (
str | LangString | list["str | LangString | dict[str, str | list[str]]"] | dict[str, str | list[str]]
)
def _normalise_into(
value: Any,
acc: list[LangString],
) -> None:
"""Recursively normalise *value* and append ``LangString`` items to *acc*."""
if isinstance(value, LangString):
acc.append(value)
elif isinstance(value, str):
acc.append(LangString(value=value, lang=None))
elif isinstance(value, dict):
for lang_key, val in value.items():
lang = lang_key or None # "" → None
if isinstance(val, list):
for v in val:
acc.append(LangString(value=str(v), lang=lang))
else:
acc.append(LangString(value=str(val), lang=lang))
elif isinstance(value, list):
for item in value:
_normalise_into(item, acc)
else:
# Last-resort: stringify
acc.append(LangString(value=str(value), lang=None))
def _deduplicate_lang_strings(items: list[LangString]) -> list[LangString]:
"""Remove duplicate ``(value, lang)`` pairs, preserving order."""
seen: set[tuple[str, str | None]] = set()
result: list[LangString] = []
for ls in items:
key = (ls.value, ls.lang)
if key not in seen:
seen.add(key)
result.append(ls)
return result
[docs]
class LangStringList(list[LangString]):
"""A ``list[LangString]`` subclass with convenience query methods.
Every mutation (``append``, ``extend``, ``+=``, ``insert``) automatically
skips duplicate ``(value, lang)`` pairs and coerces flexible input types
(``str``, ``dict``, ``LangString``) into ``LangString`` objects.
Examples
--------
::
from dartfx.rdf.pydantic import LangString, LangStringList
ls = LangStringList([
LangString(value="World", lang="en"),
LangString(value="Mundo", lang="es"),
])
ls += LangString(value="Welt", lang="de")
ls.has_language("en") # True
ls.count_by_lang("en") # 1
ls.languages() # {"en", "es", "de"}
ls.has_synonyms("en") # False
"""
# -- internal helpers ---------------------------------------------------
@staticmethod
def _norm_lang(lang: str | None) -> str | None:
"""Normalise ``""`` to ``None`` for language tags."""
return None if lang == "" else lang
def _keys(self) -> set[tuple[str, str | None]]:
return {(ls.value, ls.lang) for ls in self}
def _add_if_new(self, item: LangString) -> None:
if (item.value, item.lang) not in self._keys():
super().append(item)
[docs]
def untagged(self) -> LangStringList:
"""Return entries whose language tag is ``None``."""
return LangStringList(ls for ls in self if ls.lang is None)
# -- list overrides (uniqueness-preserving) -----------------------------
[docs]
def append(self, item: LangString) -> None:
"""Append *item*, silently skipping if ``(value, lang)`` already exists."""
self._add_if_new(item)
[docs]
def extend(self, items: Any) -> None:
"""Extend with *items*, coercing flexible inputs and deduplicating."""
normalised: list[LangString] = []
_normalise_into(items, normalised)
for ls in normalised:
self._add_if_new(ls)
[docs]
def insert(self, index: int, item: LangString) -> None: # type: ignore[override]
"""Insert *item* at *index* if ``(value, lang)`` is not already present."""
if (item.value, item.lang) not in self._keys():
super().insert(index, item)
def __iadd__(self, other: Any) -> LangStringList: # type: ignore[override]
"""Support ``ls += LangString(...)`` and ``ls += [...]``."""
normalised: list[LangString] = []
if isinstance(other, LangString):
normalised = [other]
elif isinstance(other, list):
_normalise_into(other, normalised)
else:
_normalise_into(other, normalised)
for ls in normalised:
self._add_if_new(ls)
return self
def __add__(self, other: Any) -> LangStringList: # type: ignore[override]
"""Return a new ``LangStringList`` with additional entries."""
result = LangStringList(self)
result += other
return result
# -- subtraction (removal) ----------------------------------------------
def __isub__(self, other: Any) -> LangStringList:
"""Support ``ls -= LangString(...)`` and ``ls -= [...]``.
Removes matching ``(value, lang)`` entries.
"""
to_remove: list[LangString] = []
if isinstance(other, LangString):
to_remove = [other]
else:
_normalise_into(other, to_remove)
keys_to_remove = {(ls.value, ls.lang) for ls in to_remove}
# Filter in place
kept = [ls for ls in self if (ls.value, ls.lang) not in keys_to_remove]
self.clear()
super().extend(kept)
return self
def __sub__(self, other: Any) -> LangStringList:
"""Return a new ``LangStringList`` with matching entries removed."""
result = LangStringList(self)
result -= other
return result
# -- str-like behaviour -------------------------------------------------
def __str__(self) -> str:
"""Return the plain string value when unambiguous.
* If there is exactly **one** entry → its value.
* If there are multiple entries but exactly **one** untagged
(``lang=None``) entry → that entry's value.
* Otherwise → the default list representation.
"""
if len(self) == 1:
return self[0].value
untagged = self.untagged()
if len(untagged) == 1:
return untagged[0].value
return super().__repr__()
def __eq__(self, other: object) -> bool:
"""Allow comparison with ``str`` when str-like behaviour applies.
* ``pref_label == "Hello"`` is ``True`` when there is exactly one
entry with ``value="Hello"``, or when the single untagged entry
has ``value="Hello"``.
* List-to-list comparison works normally.
"""
if isinstance(other, str):
if len(self) == 1:
return self[0].value == other
untagged = self.untagged()
if len(untagged) == 1:
return untagged[0].value == other
return False
return super().__eq__(other)
def __hash__(self) -> int: # type: ignore[override]
# Lists are unhashable by default; keep that behaviour.
raise TypeError("unhashable type: 'LangStringList'")
# -- query helpers ------------------------------------------------------
[docs]
def count_by_lang(self, lang: str | None = None) -> int:
"""Return the number of entries for a given language tag.
Parameters
----------
lang : str | None
The language tag to count (e.g. ``"en"``). Use ``None`` or
``""`` for untagged (plain) strings.
Returns
-------
int
Number of entries matching the language.
"""
lang = self._norm_lang(lang)
return sum(1 for ls in self if ls.lang == lang)
[docs]
def has_language(self, lang: str | None) -> bool:
"""Return ``True`` if at least one entry has the given language tag.
Parameters
----------
lang : str | None
Language tag to check. Use ``None`` or ``""`` for untagged entries.
"""
lang = self._norm_lang(lang)
return any(ls.lang == lang for ls in self)
[docs]
def has_untagged(self) -> bool:
"""Return ``True`` if at least one entry has no language tag (``lang=None``)."""
return self.has_language(None)
[docs]
def get_by_language(self, lang: str | None = None) -> LangStringList:
"""Return entries matching the given language tag.
Parameters
----------
lang : str | None
Language tag to filter by. Use ``None`` or ``""`` for untagged entries.
Returns
-------
LangStringList
A new ``LangStringList`` containing only matching entries.
"""
lang = self._norm_lang(lang)
return LangStringList(ls for ls in self if ls.lang == lang)
[docs]
def has_synonyms(self, lang: str | None = None) -> bool:
"""Return ``True`` if the specified language has more than one entry.
Parameters
----------
lang : str | None
Language tag to check. If ``None`` or ``""``, checks untagged entries.
"""
return self.count_by_lang(lang) > 1
[docs]
def languages(self) -> set[str | None]:
"""Return the set of distinct language tags (including ``None`` for untagged)."""
return {ls.lang for ls in self}
def _coerce_to_lang_string_list(
value: LocalizedStrInput | list[LangString] | LangStringList,
) -> LangStringList:
"""Coerce flexible input types into a ``LangStringList``.
Accepted inputs:
* ``str`` – becomes ``LangStringList([LangString(value=..., lang=None)])``
* ``LangString`` – wrapped in a LangStringList
* ``dict[str, str | list[str]]`` – each key is a language tag
(empty string ``""`` → ``lang=None``), each value becomes one or
more ``LangString`` entries
* ``list`` of any of the above (including nested dicts) – flattened
* An existing ``LangStringList`` – passed through (deduplicated)
Duplicate ``(value, lang)`` pairs are silently dropped, preserving
insertion order.
"""
if isinstance(value, LangStringList):
return LangStringList(_deduplicate_lang_strings(value))
if isinstance(value, list) and all(isinstance(v, LangString) for v in value):
return LangStringList(_deduplicate_lang_strings(cast(list[LangString], value)))
result: list[LangString] = []
_normalise_into(value, result)
return LangStringList(_deduplicate_lang_strings(result))
if TYPE_CHECKING:
# Mypy sees the wide input union so that ``Model(field="plain")`` type-checks.
LocalizedStr = LocalizedStrInput | LangStringList
else:
# At runtime Pydantic uses the BeforeValidator to coerce inputs → LangStringList.
LocalizedStr = Annotated[LangStringList, BeforeValidator(_coerce_to_lang_string_list)]
[docs]
@dataclass(frozen=True)
class RdfProperty:
"""Metadata descriptor for mapping Pydantic fields to RDF predicates.
This class is used as metadata in type annotations to specify how a Pydantic
field should be serialized to and deserialized from RDF. It provides control
over the RDF predicate URI, datatype, language tags, and custom serialization.
Parameters
----------
predicate : str | URIRef
The RDF predicate URI for this property. Can be a string URI or an
rdflib URIRef. Typically uses a namespace property like `FOAF.name`.
datatype : str | URIRef | None, optional
The XSD datatype URI for literal values. If None, the datatype is
inferred from the Python type. Examples: XSD.string, XSD.integer,
XSD.dateTime. Default is None.
language : str | None, optional
The language tag for string literals (e.g., "en", "fr", "de").
Creates language-tagged RDF literals. Cannot be used with datatype.
Default is None.
serializer : Callable | None, optional
A custom function to transform Python values before RDF serialization.
Signature: (value: Any) -> Any. The returned value should be compatible
with RDF serialization (str, int, URIRef, Literal, etc.).
Default is None.
parser : Callable | None, optional
A custom function to transform RDF nodes back to Python values during
deserialization. Signature: (node: URIRef | Literal) -> Any.
Default is None.
Attributes
----------
predicate : str | URIRef
The RDF predicate URI.
datatype : str | URIRef | None
The XSD datatype URI for literals.
language : str | None
The language tag for string literals.
serializer : Callable | None
Custom serialization function.
parser : Callable | None
Custom parsing function.
Methods
-------
predicate_uri() -> URIRef
Convert the predicate to an rdflib URIRef.
datatype_uri() -> URIRef | None
Convert the datatype to an rdflib URIRef, or None if not specified.
Examples
--------
Basic property mapping::
from rdflib import FOAF
from typing import Annotated, Optional, List
name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
With datatype::
from rdflib import XSD
age: Annotated[Optional[int], RdfProperty(
FOAF.age,
datatype=XSD.integer
)] = None
With language tag::
description: Annotated[Optional[List[str]], RdfProperty(
DCTERMS.description,
language="en"
)] = None
With custom serializer/parser::
from datetime import date
def serialize_date(d: date) -> str:
return d.isoformat()
def parse_date(node) -> date:
return date.fromisoformat(str(node))
birth_date: Annotated[Optional[date], RdfProperty(
SCHEMA.birthDate,
serializer=serialize_date,
parser=parse_date
)] = None
Notes
-----
- RdfProperty instances are immutable (frozen dataclass)
- Use in Annotated type hints as metadata
- Language and datatype are mutually exclusive
- Custom serializers/parsers override default behavior
- The predicate URI is the only required parameter
See Also
--------
RdfBaseModel : Base class for RDF-enabled Pydantic models
"""
predicate: str | URIRef
datatype: str | URIRef | None = None
language: str | None = None
serializer: Any | None = None
parser: Any | None = None
[docs]
def predicate_uri(self) -> URIRef:
"""Convert the predicate to an rdflib URIRef.
Returns
-------
URIRef
The predicate as an rdflib URIRef.
Examples
--------
>>> from rdflib import FOAF
>>> prop = RdfProperty(FOAF.name)
>>> prop.predicate_uri()
rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name')
"""
result = _ensure_uri(self.predicate)
assert result is not None # predicate is required, so this should never be None
return result
[docs]
def datatype_uri(self) -> URIRef | None:
"""Convert the datatype to an rdflib URIRef.
Returns
-------
URIRef | None
The datatype as an rdflib URIRef, or None if no datatype is specified.
Examples
--------
>>> from rdflib import XSD
>>> prop = RdfProperty(FOAF.age, datatype=XSD.integer)
>>> prop.datatype_uri()
rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')
"""
return _ensure_uri(self.datatype)
[docs]
@runtime_checkable
class RdfUriGenerator(Protocol):
"""Protocol for objects that generate an RDF subject URI from a model instance.
Any callable with the matching signature — including plain functions and
lambdas — satisfies this protocol, so existing ``rdf_uri_generator``
callables require no changes.
Parameters
----------
model : RdfBaseModel
The model instance being serialised.
base_uri : str | None, optional
Base URI hint, forwarded from ``to_rdf_graph``.
Returns
-------
URIRef | BNode
The subject node to use for the resource.
Examples
--------
Using a plain function::
def my_generator(model: RdfBaseModel, *, base_uri: str | None = None) -> URIRef | BNode:
return EX[type(model).__name__ + "/" + str(model.id)]
person = Person(id="alice", rdf_uri_generator=my_generator)
Using a class-based generator::
class PrefixedGenerator:
def __init__(self, prefix: str) -> None:
self.prefix = prefix
def __call__(self, model: RdfBaseModel, *, base_uri: str | None = None) -> URIRef | BNode:
return URIRef(self.prefix + str(model.id))
"""
def __call__(
self,
model: RdfBaseModel,
*,
base_uri: str | None = None,
) -> URIRef | BNode: ...
[docs]
class DefaultUriGenerator:
"""Default RDF subject URI generator.
Encapsulates the standard URI resolution strategy used by
:class:`RdfBaseModel` out of the box:
1. If the model has an ``rdf_id_field`` and that field is non-``None``,
build a URI from the value:
* If the value already looks like an absolute URI, use it directly.
* If the model's class defines ``rdf_namespace``, prepend it.
* If a ``base_uri`` was provided to the serialiser, prepend it.
* Otherwise use the raw string as a URI.
2. If no identifier was found and ``auto_uuid`` is ``True``, mint a new
UUID-based URI (using the namespace if available, otherwise
``urn:uuid:<uuid4>``).
3. If ``auto_uuid`` is ``False``, return a :class:`rdflib.BNode`.
Parameters
----------
auto_uuid : bool
Whether to generate a UUID URI when no explicit identifier is present.
Default is ``True``.
Why ``auto_uuid=True`` is the default
--------------------------------------
From a strict RDF perspective, a resource with no stable global identifier
*should* be represented as a Blank Node (BNode): anonymous, scoped to a
single graph, and carrying no identity commitment.
However, ``auto_uuid=True`` is the pragmatic default for developer
experience:
* **Graph portability** — UUID URIs survive serialisation and can be
referenced across graph boundaries; BNodes cannot.
* **Predictable round-trips** — ``from_rdf`` can reconstruct the subject
URI from a UUID URI. BNode identifiers are opaque and may change across
parse/serialise cycles.
* **Merge safety** — merging two graphs that both contain BNodes can
silently collapse unrelated resources; UUID URIs are globally unique.
Set ``auto_uuid=False`` when you explicitly want anonymous resources (e.g.
reified statements, inline blank-node structures) and accept the inability
to reference them externally.
See Also
--------
TemplateUriGenerator : URI from a pattern string.
HashUriGenerator : Deterministic URI from field content.
CompositeUriGenerator : Priority-ordered fallback chain.
Examples
--------
Default usage (auto UUID enabled)::
person = Person(rdf_uri_generator=DefaultUriGenerator())
Disable UUID fallback (produces BNodes instead)::
person = Person(rdf_uri_generator=DefaultUriGenerator(auto_uuid=False))
"""
def __init__(self, *, auto_uuid: bool = True) -> None:
self.auto_uuid = auto_uuid
def __call__(
self,
model: RdfBaseModel,
*,
base_uri: str | None = None,
) -> URIRef | BNode:
"""Generate the subject URI for *model*."""
identifier: str | None = None
if model.rdf_id_field:
value = getattr(model, model.rdf_id_field, None)
if value is not None:
identifier = str(value)
if identifier:
if _looks_like_uri(identifier):
return URIRef(identifier)
namespace = model._namespace_string()
if namespace:
return URIRef(namespace + identifier)
if base_uri:
return URIRef(_normalise_base(base_uri) + identifier)
return URIRef(identifier)
if self.auto_uuid:
namespace = model._namespace_string()
if namespace:
return URIRef(namespace + str(uuid.uuid4()))
return URIRef(f"urn:uuid:{uuid.uuid4()}")
return BNode()
[docs]
class RdfBaseModel(BaseModel):
"""Base class for Pydantic models with RDF serialization capabilities.
This class extends Pydantic's BaseModel to provide automatic conversion to and
from RDF graphs. Models inheriting from RdfBaseModel can be serialized to various
RDF formats (Turtle, RDF/XML, JSON-LD, etc.) and deserialized back to Python objects.
Class Attributes
----------------
rdf_type : str | URIRef | None
The RDF type (rdf:type) for instances of this class. Typically set to a
vocabulary class URI like `FOAF.Person` or `SKOS.Concept`. If None, no
rdf:type triple is added to the graph. Note: `None` is valid only for
base or abstract models; concrete vocabulary classes should explicitly
define an `rdf_type`.
rdf_namespace : str | Namespace | None
The default namespace for generating subject URIs. Used when an instance
has an `id` but not a full URI. For example, with namespace `FOAF` and
id `"john"`, the subject becomes `<http://xmlns.com/foaf/0.1/john>`.
rdf_id_field : str | None
The name of the field to use for the RDF subject identifier. Defaults to
`"id"`. Set to None to disable ID field mapping and always use UUIDs.
rdf_prefixes : Dict[str, str | Namespace]
Namespace prefix bindings for RDF serialization. Used to create readable
output with prefixes like `foaf:name` instead of full URIs. Automatically
includes 'rdf' and 'xsd' prefixes.
Instance Attributes
-------------------
id : Any, optional
If `rdf_id_field` is "id" (default), this field contains the subject
identifier. Can be a short string (combined with namespace) or a full URI.
Methods
-------
to_rdf_graph(graph=None, *, base_uri=None) -> Graph
Serialize this model instance into an rdflib Graph.
to_rdf(format="turtle", *, base_uri=None, **kwargs) -> str
Serialize this model instance to an RDF string in the specified format.
from_rdf_graph(graph, subject, *, base_uri=None) -> RdfBaseModel
Class method to deserialize a model from an RDF graph.
from_rdf(data, *, format="turtle", subject=None, base_uri=None) -> RdfBaseModel
Class method to deserialize a model from an RDF string or bytes.
Configuration
-------------
The model_config allows arbitrary types (URIRef, Literal, etc.) in fields.
Examples
--------
Basic model definition::
from rdflib import Namespace, FOAF
from typing import Annotated, Optional, List
class Person(RdfBaseModel):
rdf_type: str = str(FOAF.Person)
rdf_namespace = FOAF
rdf_prefixes = {"foaf": FOAF}
name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
email: Annotated[Optional[List[str]], RdfProperty(FOAF.mbox)] = None
Creating and serializing::
person = Person(name=["Alice Smith"], email=["alice@example.org"])
turtle_output = person.to_rdf("turtle")
# Output includes proper @prefix declarations and triples
Deserializing::
restored = Person.from_rdf(turtle_output, format="turtle")
assert restored.name == ["Alice Smith"]
With custom ID::
person = Person(id="alice", name=["Alice Smith"])
# Subject URI becomes: <http://xmlns.com/foaf/0.1/alice>
With full URI as ID::
person = Person(id="http://example.org/people/alice", name=["Alice"])
# Subject URI is: <http://example.org/people/alice>
Nested objects::
class Organization(RdfBaseModel):
rdf_type: str = str(FOAF.Organization)
name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
class Person(RdfBaseModel):
rdf_type: str = str(FOAF.Person)
name: Annotated[Optional[List[str]], RdfProperty(FOAF.name)] = None
org: Annotated[Optional[List[Organization]], RdfProperty(FOAF.member)] = None
person = Person(
name=["Alice"],
org=[Organization(name=["ACME Corp"])]
)
# Both person and organization are serialized to the graph
Notes
-----
- All fields mapped to RDF should use `Annotated[..., RdfProperty(...)]`
- Multi-valued properties use `Optional[List[T]]` (standard in RDF)
- The `id` field is optional; if not provided, a UUID is generated
- Nested RdfBaseModel instances are automatically serialized
- Round-trip serialization is lossless for supported types
- Custom serializers/parsers can handle complex types
See Also
--------
RdfProperty : Metadata for field-to-predicate mapping
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
rdf_type: ClassVar[str | URIRef | None] = None
rdf_namespace: ClassVar[str | Namespace | None] = None
rdf_id_field: ClassVar[str | None] = "id"
rdf_prefixes: ClassVar[dict[str, str | Namespace]] = {}
rdf_uri_generator: RdfUriGenerator = Field(default_factory=DefaultUriGenerator, exclude=True)
[docs]
def to_rdf_graph(
self,
graph: Graph | None = None,
*,
base_uri: str | None = None,
rdf_uri_generator: RdfUriGenerator | None = None,
) -> Graph:
"""Serialize the model instance into an rdflib Graph.
This method converts the Pydantic model instance into RDF triples and adds
them to an rdflib Graph. All fields annotated with RdfProperty are converted
to RDF predicates and objects. Nested RdfBaseModel instances are recursively
serialized.
Parameters
----------
graph : Graph | None, optional
An existing rdflib Graph to add triples to. If None, a new Graph is
created. Default is None.
base_uri : str | None, optional
A base URI for generating subject URIs when the model doesn't have a
full URI identifier. Used for relative identifier resolution.
Default is None.
rdf_uri_generator : RdfUriGenerator | None, optional
A custom function to generate subject URIs for model instances.
The function receives the model instance and should return an
rdflib URIRef or BNode. This overrides the model's own
rdf_uri_generator if provided.
Returns
-------
Graph
The rdflib Graph containing the serialized RDF triples.
Examples
--------
Basic serialization::
person = Person(name=["Alice"])
graph = person.to_rdf_graph()
# graph now contains triples for the person
Adding to existing graph::
graph = Graph()
person1 = Person(name=["Alice"])
person2 = Person(name=["Bob"])
person1.to_rdf_graph(graph)
person2.to_rdf_graph(graph)
# graph contains triples for both persons
With base URI::
person = Person(id="alice", name=["Alice"])
graph = person.to_rdf_graph(base_uri="http://example.org/people/")
# Subject becomes: <http://example.org/people/alice>
Notes
-----
- Namespace prefixes from rdf_prefixes are automatically bound
- rdf:type triple is added if rdf_type is set
- None values and empty lists are skipped
- The subject URI is generated from the id field or a UUID
See Also
--------
to_rdf : Serialize directly to a string format
from_rdf_graph : Deserialize from a Graph
"""
graph = graph if graph is not None else Graph()
self._serialise_into_graph(graph, base_uri=base_uri, rdf_uri_generator=rdf_uri_generator)
return graph
[docs]
def to_rdf(
self,
format: str = "turtle",
*,
base_uri: str | None = None,
rdf_uri_generator: RdfUriGenerator | None = None,
**kwargs: Any,
) -> str:
"""Serialize the model instance to an RDF string.
This is a convenience method that creates a Graph, serializes the model
into it, and then serializes the Graph to the specified format.
Parameters
----------
format : str, optional
The RDF serialization format. Supported formats include:
- "turtle" (default): Turtle/Trig format
- "xml" or "pretty-xml": RDF/XML format
- "json-ld": JSON-LD format
- "nt" or "ntriples": N-Triples format
- "n3": Notation3 format
Default is "turtle".
base_uri : str | None, optional
A base URI for generating subject URIs. Default is None.
rdf_uri_generator : RdfUriGenerator | None, optional
A custom function to generate subject URIs for model instances.
The function receives the model instance and should return an
rdflib URIRef or BNode. This overrides the model's own
rdf_uri_generator if provided.
**kwargs : Any
Additional keyword arguments passed to rdflib's serialize() method.
Returns
-------
str
The serialized RDF as a string.
Examples
--------
Turtle format (default)::
person = Person(name=["Alice Smith"])
turtle = person.to_rdf("turtle")
print(turtle)
# @prefix foaf: <http://xmlns.com/foaf/0.1/> .
# foaf:alice a foaf:Person ;
# foaf:name "Alice Smith" .
RDF/XML format::
xml = person.to_rdf("xml")
JSON-LD format::
jsonld = person.to_rdf("json-ld")
N-Triples format::
ntriples = person.to_rdf("ntriples")
Notes
-----
- Turtle format is most human-readable with prefix support
- Format names are case-insensitive
- The output encoding is UTF-8
See Also
--------
to_rdf_graph : Get the Graph object directly
from_rdf : Deserialize from an RDF string
"""
graph = self.to_rdf_graph(base_uri=base_uri, rdf_uri_generator=rdf_uri_generator)
return graph.serialize(format=format, **kwargs) # type: ignore[no-any-return]
[docs]
@classmethod
def from_rdf_graph(
cls: type[T],
graph: Graph,
subject: URIRef | BNode | str,
*,
base_uri: str | None = None,
) -> T:
"""Deserialize a model instance from an RDF graph.
This class method reconstructs a Pydantic model instance from RDF triples
in a Graph. It extracts values for all fields annotated with RdfProperty
by querying the graph for triples with the specified subject.
Parameters
----------
graph : Graph
The rdflib Graph containing the RDF data.
subject : URIRef | str
The subject URI of the resource to deserialize. Can be a URIRef or
a string that will be converted to a URIRef.
base_uri : str | None, optional
A base URI for converting the subject back to a relative identifier
for the id field. If the subject starts with this base, the remainder
is used as the id. Default is None.
Returns
-------
RdfBaseModel
A new instance of the model class populated with data from the graph.
Raises
------
ValidationError
If the extracted values don't pass Pydantic validation.
Examples
--------
Basic deserialization::
graph = Graph()
graph.parse(data=turtle_data, format="turtle")
person = Person.from_rdf_graph(
graph,
URIRef("http://example.org/people/alice")
)
With base URI::
person = Person.from_rdf_graph(
graph,
URIRef("http://example.org/people/alice"),
base_uri="http://example.org/people/"
)
# person.id becomes "alice"
Nested objects::
# If the graph contains triples for both Person and Organization,
# nested objects are automatically reconstructed
person = Person.from_rdf_graph(graph, subject_uri)
assert isinstance(person.org[0], Organization)
Notes
-----
- Multi-valued properties are always returned as lists
- Missing properties result in None values
- Nested RdfBaseModel instances are recursively deserialized
- Custom parsers in RdfProperty are applied during conversion
- Type coercion follows Pydantic's validation rules
See Also
--------
from_rdf : Deserialize from an RDF string
to_rdf_graph : Serialize to a Graph
"""
subject_uri = _ensure_uri(subject)
if subject_uri is None:
msg = "Subject URI cannot be None"
raise ValueError(msg)
values: dict[str, Any] = {}
for name, field in cls.model_fields.items():
prop = _get_rdf_property(field)
if prop is None:
continue
predicate = prop.predicate_uri()
is_list, accepts_scalar, inner_type = _field_type_info(field)
# Detect whether this field is a LocalizedStr (canonical list[LangString])
is_localized = _is_localized_str_field(field)
objects = list(graph.objects(subject_uri, predicate))
if not objects:
continue
if is_localized:
# Produce list[LangString] directly – Pydantic's BeforeValidator
# inside LocalizedStr will deduplicate.
lang_items: list[LangString] = []
for obj in objects:
if isinstance(obj, Literal):
lang_items.append(LangString(value=str(obj), lang=obj.language))
else:
lang_items.append(LangString(value=str(obj), lang=None))
values[name] = lang_items
continue
model_type = _get_rdf_model_type(inner_type)
if model_type:
items: list[Any] = []
for obj in objects:
if isinstance(obj, (URIRef, BNode)):
items.append(model_type.from_rdf_graph(graph, obj, base_uri=base_uri))
else:
items.append(_node_to_python(obj, inner_type, prop))
else:
items = [_node_to_python(obj, inner_type, prop) for obj in objects]
if is_list:
# Return scalar when the field accepts both scalar and list
# and exactly one value was found in the graph.
if accepts_scalar and len(items) == 1:
values[name] = items[0]
else:
values[name] = items
else:
values[name] = items[0]
id_field = cls.rdf_id_field
if id_field and id_field not in values:
identifier = cls._identifier_from_subject(subject_uri, base_uri=base_uri)
if identifier is not None:
values[id_field] = identifier
return cls(**values)
[docs]
@classmethod
def from_rdf(
cls: type[T],
data: str | bytes,
format: str = "turtle",
*,
subject: URIRef | BNode | str | None = None,
base_uri: str | None = None,
) -> T:
"""Deserialize a model instance from an RDF string or bytes.
This class method parses RDF data and reconstructs a Pydantic model instance.
If the subject is not specified, it attempts to infer it from the graph
(using rdf:type if available, or assuming a single subject).
Parameters
----------
data : str | bytes
The RDF data as a string or bytes. Can be in any format supported
by rdflib (Turtle, RDF/XML, JSON-LD, N-Triples, etc.).
format : str, optional
The RDF format of the input data. Common formats:
- "turtle": Turtle/Trig format (default)
- "xml": RDF/XML format
- "json-ld": JSON-LD format
- "nt" or "ntriples": N-Triples format
- "n3": Notation3 format
Default is "turtle".
subject : URIRef | str | None, optional
The subject URI to deserialize. If None, the subject is automatically
inferred from the graph. Use this when the graph contains multiple
resources. Default is None.
base_uri : str | None, optional
A base URI for generating relative identifiers. Default is None.
Returns
-------
RdfBaseModel
A new instance of the model class populated with the RDF data.
Raises
------
ValueError
If subject is None and the subject cannot be inferred, or if multiple
subjects are found and none is specified.
ValidationError
If the deserialized data doesn't pass Pydantic validation.
Examples
--------
From Turtle string::
turtle = '''
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
foaf:alice a foaf:Person ;
foaf:name "Alice Smith" ;
foaf:mbox "alice@example.org" .
'''
person = Person.from_rdf(turtle, format="turtle")
With explicit subject::
person = Person.from_rdf(
turtle_data,
format="turtle",
subject="http://example.org/people/alice"
)
From RDF/XML::
person = Person.from_rdf(xml_data, format="xml")
From JSON-LD::
person = Person.from_rdf(jsonld_data, format="json-ld")
Round-trip example::
# Serialize
original = Person(name=["Alice"])
turtle = original.to_rdf("turtle")
# Deserialize
restored = Person.from_rdf(turtle)
assert restored.name == original.name
Notes
-----
- Subject inference works best with single-resource graphs
- If rdf_type is set, it's used to find the subject
- Format detection is not automatic; always specify the format
- Bytes input is decoded as UTF-8
See Also
--------
from_rdf_graph : Deserialize from a Graph object
to_rdf : Serialize to an RDF string
"""
graph = Graph()
graph.parse(data=data, format=format)
if subject is None:
subject = cls._infer_subject(graph)
if subject is None:
raise ValueError("Unable to determine subject for RDF document; provide the subject explicitly.")
return cls.from_rdf_graph(graph, subject, base_uri=base_uri)
def _serialise_into_graph(
self,
graph: Graph,
*,
base_uri: str | None = None,
rdf_uri_generator: RdfUriGenerator | None = None,
) -> URIRef | BNode:
"""Internal method to serialize this model into an RDF graph.
Converts all annotated fields to RDF triples and adds them to the graph.
This method handles the core serialization logic.
Parameters
----------
graph : Graph
The rdflib Graph to add triples to.
base_uri : str | None, optional
Base URI for subject generation.
rdf_uri_generator : RdfUriGenerator | None, optional
A custom function to generate subject URIs for model instances.
Returns
-------
URIRef | BNode
The subject URI of the serialized resource.
"""
subject = self._subject_uri(base_uri=base_uri, rdf_uri_generator=rdf_uri_generator)
self._bind_prefixes(graph)
rdf_type_uri = _ensure_uri(self.rdf_type)
if rdf_type_uri is not None:
graph.add((subject, RDF.type, rdf_type_uri))
for name, field in self.__class__.model_fields.items():
prop = _get_rdf_property(field)
if prop is None:
continue
value = getattr(self, name)
if value is None:
continue
predicate = prop.predicate_uri()
# Fast path for LocalizedStr fields (LangStringList)
if isinstance(value, LangStringList):
for ls_item in value:
graph.add(
(
subject,
predicate,
Literal(ls_item.value, lang=ls_item.lang),
)
)
continue
is_list, _accepts_scalar, inner_type = _field_type_info(field)
# Support both single values and lists for fields that allow both
if is_list:
values = value if isinstance(value, list) else [value]
else:
values = [value]
for item in values:
if item is None:
continue
node = self._value_to_node(
item,
inner_type,
prop,
graph,
base_uri,
rdf_uri_generator=rdf_uri_generator,
)
graph.add((subject, predicate, node))
return subject
@classmethod
def _identifier_from_subject(cls, subject: URIRef, *, base_uri: str | None = None) -> str | None:
"""Extract an identifier from a subject URI.
Attempts to convert a subject URI back to a short identifier by removing
the namespace or base URI prefix.
Parameters
----------
subject : URIRef
The subject URI to convert.
base_uri : str | None, optional
Base URI to strip from the subject.
Returns
-------
str | None
The extracted identifier, or the full URI if no prefix matches.
"""
subject_str = str(subject)
namespace = cls._namespace_string()
if namespace and subject_str.startswith(namespace):
return subject_str[len(namespace) :]
if base_uri:
normalised = _normalise_base(base_uri)
if subject_str.startswith(normalised):
return subject_str[len(normalised) :]
return subject_str
@classmethod
def _namespace_string(cls) -> str | None:
"""Get the namespace as a string.
Returns
-------
str | None
The namespace URI as a string, or None if no namespace is set.
"""
namespace = cls.rdf_namespace
if namespace is None:
return None
if isinstance(namespace, Namespace):
return str(namespace)
return str(namespace)
def _subject_uri(
self,
*,
base_uri: str | None = None,
rdf_uri_generator: RdfUriGenerator | None = None,
) -> URIRef | BNode:
"""Generate the subject URI for this instance.
Delegates entirely to the active :class:`RdfUriGenerator`. The
*rdf_uri_generator* argument, when provided, overrides the instance's
own generator for this single call (used by ``to_rdf_graph`` and
``to_rdf``).
Parameters
----------
base_uri : str | None, optional
Base URI forwarded to the generator.
rdf_uri_generator : RdfUriGenerator | None, optional
Call-site override generator; falls back to ``self.rdf_uri_generator``.
Returns
-------
URIRef | BNode
The subject URI or Blank Node for this resource.
"""
gen = rdf_uri_generator if rdf_uri_generator is not None else self.rdf_uri_generator
return gen(self, base_uri=base_uri)
def _bind_prefixes(self, graph: Graph) -> None:
"""Bind namespace prefixes to the graph for readable serialization.
Parameters
----------
graph : Graph
The graph to bind prefixes to.
"""
prefixes = _default_prefixes()
prefixes.update({key: str(value) for key, value in self.rdf_prefixes.items()})
for prefix, namespace in prefixes.items():
graph.bind(prefix, namespace)
def _value_to_node(
self,
value: Any,
expected_type: Any,
prop: RdfProperty,
graph: Graph,
base_uri: str | None,
*,
rdf_uri_generator: RdfUriGenerator | None = None,
) -> URIRef | BNode | Literal:
"""Convert a Python value to an RDF node (URIRef, BNode, or Literal).
Handles various Python types and converts them to appropriate RDF
representations based on the field type and RdfProperty configuration.
Parameters
----------
value : Any
The Python value to convert.
expected_type : Any
The expected type from the field annotation.
prop : RdfProperty
The RDF property metadata.
graph : Graph
The graph for nested object serialization.
base_uri : str | None
Base URI for nested objects.
rdf_uri_generator : RdfUriGenerator | None, optional
A custom function to generate subject URIs for model instances.
Returns
-------
URIRef | BNode | Literal
The RDF node representation of the value.
"""
if prop.serializer is not None:
value = prop.serializer(value)
if isinstance(value, RdfBaseModel):
return value._serialise_into_graph(graph, base_uri=base_uri, rdf_uri_generator=rdf_uri_generator)
if isinstance(value, URIRef):
return value
if isinstance(value, Literal):
return value
if isinstance(value, Enum):
value = value.value
if isinstance(value, bytes):
import base64
encoded = base64.b64encode(value).decode("ascii")
return Literal(encoded, datatype=XSD.base64Binary)
if isinstance(value, LangString):
return Literal(value.value, lang=value.lang)
if isinstance(value, (datetime, date, time, int, float, bool, Decimal, uuid.UUID)):
datatype = prop.datatype_uri()
if datatype is None:
datatype = _python_datatype(value)
return Literal(value, datatype=datatype)
if isinstance(value, str):
datatype = prop.datatype_uri()
if prop.language:
return Literal(value, lang=prop.language)
if datatype is not None:
return Literal(value, datatype=datatype)
# Check if URIRef is an expected type
origin = get_origin(expected_type)
is_union = origin is Union or (hasattr(types, "UnionType") and origin is types.UnionType)
if is_union:
allowed_types = get_args(expected_type)
else:
allowed_types = (expected_type,)
if URIRef in allowed_types and _looks_like_uri(value):
return URIRef(value)
return Literal(value)
return Literal(value)
@classmethod
def _infer_subject(cls, graph: Graph) -> URIRef | BNode | None:
"""Infer the subject URI from a graph.
Attempts to determine which subject in the graph corresponds to this
model type, using rdf:type if available or assuming a single subject.
Parameters
----------
graph : Graph
The graph to analyze.
Returns
-------
URIRef | BNode | None
The inferred subject URI, or None if it cannot be determined.
Raises
------
ValueError
If multiple subjects are found and cannot be disambiguated.
"""
rdf_type_uri = _ensure_uri(cls.rdf_type)
if rdf_type_uri is not None:
subjects = _unique(graph.subjects(RDF.type, rdf_type_uri))
if not subjects:
return None
if len(subjects) > 1:
raise ValueError(
"Multiple resources of the requested rdf:type were found; provide the subject explicitly."
)
return cast(URIRef | BNode, subjects[0])
subjects = _unique(graph.subjects())
if not subjects:
return None
if len(subjects) > 1:
raise ValueError("Multiple resources found in graph; provide the subject explicitly.")
return cast(URIRef | BNode, subjects[0])
def _get_rdf_property(field: Any) -> RdfProperty | None:
"""Extract RdfProperty metadata from a field's metadata or annotation.
Parameters
----------
field : Any
A Pydantic field information object.
Returns
-------
RdfProperty | None
The RdfProperty if found in metadata, otherwise None.
"""
metadata = getattr(field, "metadata", ()) or ()
for item in metadata:
if isinstance(item, RdfProperty):
return item
annotation = getattr(field, "annotation", None)
if annotation is not None:
for item in _annotation_metadata(annotation):
if isinstance(item, RdfProperty):
return item
return None
def _field_type_info(field: Any) -> tuple[bool, bool, Any]:
"""Determine if a field is a list type and extract its inner type.
Also handles Optional types by unwrapping Union[T, None].
Parameters
----------
field : Any
A Pydantic field information object.
Returns
-------
tuple[bool, bool, Any]
A tuple of (is_list, accepts_scalar, inner_type).
- is_list is True if the field accepts list values.
- accepts_scalar is True if the field also accepts a single
(non-list) value, e.g. ``str | list[str] | None``.
- inner_type is the type of individual elements.
"""
annotation = getattr(field, "annotation", Any)
annotation = _unwrap_annotation(annotation)
origin = get_origin(annotation)
# Handle both Union[T, None], T | None syntax and Union with list
if origin is Union or origin is types.UnionType:
args = get_args(annotation)
# Check if any arg is a list to support Union[str, list[str]]
has_list = False
list_item_type: Any = Any
non_none_non_list_args: list[Any] = []
for arg in args:
arg_unwrapped = _unwrap_annotation(arg)
if get_origin(arg_unwrapped) is list:
has_list = True
list_args = get_args(arg_unwrapped)
list_item_type = _unwrap_annotation(list_args[0]) if list_args else Any
elif arg is not type(None):
non_none_non_list_args.append(arg)
if has_list:
# The field has a list variant. If it also has non-None scalar
# type args, it accepts scalars too (e.g. str | list[str] | None).
accepts_scalar = len(non_none_non_list_args) > 0
return True, accepts_scalar, list_item_type
# Existing logic for unwrapping Optional[T]
non_none_args = [arg for arg in args if arg is not type(None)]
if len(non_none_args) == 1:
annotation = _unwrap_annotation(non_none_args[0])
origin = get_origin(annotation)
if origin is list:
item_type = _unwrap_annotation(get_args(annotation)[0])
return True, False, item_type
# LangStringList is a concrete subclass of list[LangString],
# so get_origin returns list but get_args may be empty.
if isinstance(annotation, type) and issubclass(annotation, LangStringList):
return True, False, LangString
return False, False, annotation
def _unwrap_annotation(annotation: Any) -> Any:
"""Unwrap Annotated type to get the actual type.
Recursively unwraps until reaching a non-Annotated type.
Parameters
----------
annotation : Any
A potentially Annotated type hint.
Returns
-------
Any
The unwrapped type, or the original if not Annotated.
"""
while True:
origin = get_origin(annotation)
if origin is None:
return annotation
if origin is Annotated:
annotation = get_args(annotation)[0]
continue
return annotation
def _annotation_metadata(annotation: Any) -> tuple[Any, ...]:
"""Extract metadata from an Annotated type.
Parameters
----------
annotation : Any
A type annotation, possibly Annotated.
Returns
-------
tuple[Any, ...]
The metadata items if Annotated, otherwise empty tuple.
"""
if get_origin(annotation) is Annotated:
args = get_args(annotation)
return tuple(args[1:])
return ()
def _node_to_python(node: Any, expected_type: Any, prop: RdfProperty) -> Any:
"""Convert an RDF node to a Python value.
Handles deserialization of URIRef and Literal nodes to appropriate Python
types based on field type hints and RdfProperty configuration.
Parameters
----------
node : Any
The RDF node to convert (URIRef or Literal).
expected_type : Any
The expected Python type from field annotations.
prop : RdfProperty
The RDF property metadata.
Returns
-------
Any
The converted Python value.
Raises
------
TypeError
If a nested RDF model is encountered (should be handled separately).
"""
if prop.parser is not None:
return prop.parser(node)
if expected_type is LangString or (isinstance(expected_type, type) and issubclass(expected_type, LangString)):
if isinstance(node, Literal):
return LangString(value=str(node), lang=node.language)
return LangString(value=str(node))
if _is_rdf_model(expected_type):
raise TypeError("Nested RDF models should be handled separately.")
if expected_type is URIRef:
if isinstance(node, URIRef):
return node
return URIRef(str(node))
if isinstance(node, Literal):
value = node.toPython()
else:
value = str(node)
if expected_type is Any or expected_type is None:
return value
if expected_type is str:
return str(value)
if expected_type in {int, float, bool}:
try:
return expected_type(value)
except (TypeError, ValueError):
return value
if expected_type is datetime:
if isinstance(value, datetime):
return value
try:
return datetime.fromisoformat(str(value))
except ValueError:
return value
if expected_type is date:
if isinstance(value, date):
return value
try:
return date.fromisoformat(str(value))
except ValueError:
return value
if expected_type is time:
if isinstance(value, time):
return value
try:
return time.fromisoformat(str(value))
except ValueError:
return value
if expected_type is Decimal:
try:
return Decimal(value)
except (ValueError, TypeError, ArithmeticError):
return value
if expected_type is bytes:
if isinstance(value, bytes):
return value
# rdflib handles base64 decoding for XSD.base64Binary
return value
if expected_type is uuid.UUID:
if isinstance(value, uuid.UUID):
return value
try:
return uuid.UUID(str(value))
except (ValueError, TypeError):
return value
if isinstance(expected_type, type) and issubclass(expected_type, Enum):
return expected_type(value)
return value
def _python_datatype(value: Any) -> URIRef | None:
"""Infer XSD datatype URI from a Python value.
Parameters
----------
value : Any
A Python value to determine the datatype for.
Returns
-------
URIRef | None
The XSD datatype URI, or None if no mapping exists.
"""
if isinstance(value, bool):
return XSD.boolean
if isinstance(value, int):
return XSD.integer
if isinstance(value, float):
return XSD.double
if isinstance(value, datetime):
return XSD.dateTime
if isinstance(value, date):
return XSD.date
if isinstance(value, time):
return XSD.time
if isinstance(value, Decimal):
return XSD.decimal
if isinstance(value, bytes):
return XSD.base64Binary
if isinstance(value, uuid.UUID):
return XSD.string
return None
def _ensure_uri(value: str | URIRef | Namespace | None) -> URIRef | None:
"""Convert various types to a URIRef.
Parameters
----------
value : str | URIRef | Namespace | None
A value that might represent a URI.
Returns
-------
URIRef | None
The URIRef representation, or None if the value is None.
"""
if value is None:
return None
if isinstance(value, URIRef):
return value
if isinstance(value, Namespace):
return URIRef(str(value))
return URIRef(str(value))
URI_PATTERN = re.compile(r"^[a-zA-Z][a-zA-Z0-9+.-]*:")
def _looks_like_uri(value: str) -> bool:
"""Check if a string looks like a URI using a URI scheme pattern.
Parameters
----------
value : str
A string to check.
Returns
-------
bool
True if the string starts with a URI scheme (e.g., 'http:', 'urn:').
"""
return bool(URI_PATTERN.match(value))
def _normalise_base(base_uri: str) -> str:
"""Normalize a base URI to ensure it ends with '/' or '#'.
Parameters
----------
base_uri : str
A base URI string.
Returns
-------
str
The normalized base URI.
"""
if base_uri.endswith(("/", "#")):
return base_uri
return base_uri + "/"
def _unique(values: Iterable[Any]) -> list[Any]:
"""Return unique items from an iterable, preserving order.
Parameters
----------
values : Iterable[Any]
An iterable of items.
Returns
-------
list[Any]
A list with duplicates removed, in original order.
"""
seen = set()
result = []
for value in values:
if value not in seen:
seen.add(value)
result.append(value)
return result
def _default_prefixes() -> dict[str, str]:
"""Get the default namespace prefixes for RDF serialization.
Returns
-------
dict[str, str]
A dictionary mapping prefix strings to namespace URI strings.
Includes rdf and xsd by default.
"""
return {"rdf": str(RDF), "xsd": str(XSD)}
def _is_localized_str_field(field: Any) -> bool:
"""Check whether a field's annotation resolves to ``LocalizedStr``.
``LocalizedStr`` is ``Annotated[list[LangString], BeforeValidator(...)]``.
After Pydantic unwrapping we look for ``list[LangString]`` anywhere in
the annotation tree (including ``Union[..., None]`` wrappers).
"""
annotation = getattr(field, "annotation", None)
if annotation is None:
return False
return _annotation_contains_lang_list(annotation)
def _annotation_contains_lang_list(annotation: Any) -> bool:
"""Return *True* if *annotation* is or contains ``LangStringList`` or ``list[LangString]``."""
# Unwrap Annotated
unwrapped = _unwrap_annotation(annotation)
# Direct match: LangStringList (concrete class, not generic)
if unwrapped is LangStringList:
return True
origin = get_origin(unwrapped)
# Direct match: list[LangString]
if origin is list:
args = get_args(unwrapped)
if args and (args[0] is LangString or _unwrap_annotation(args[0]) is LangString):
return True
return False
# Union: recurse into each branch
if origin is Union or origin is types.UnionType:
for arg in get_args(unwrapped):
if arg is type(None):
continue
if _annotation_contains_lang_list(arg):
return True
return False
def _is_rdf_model(value: Any) -> bool:
"""Check if a value is an RdfBaseModel subclass.
Parameters
----------
value : Any
A value to check (typically a type).
Returns
-------
bool
True if value is a class and a subclass of RdfBaseModel.
"""
return isinstance(value, type) and issubclass(value, RdfBaseModel)
def _get_rdf_model_type(type_hint: Any) -> type[RdfBaseModel] | None:
"""Get the RdfBaseModel type from a type hint (possibly a Union).
Parameters
----------
type_hint : Any
The type hint to check.
Returns
-------
Type[RdfBaseModel] | None
The RdfBaseModel subclass if found, otherwise None.
"""
if _is_rdf_model(type_hint):
return type_hint # type: ignore[no-any-return]
origin = get_origin(type_hint)
if origin is Union or (hasattr(types, "UnionType") and origin is types.UnionType):
for arg in get_args(type_hint):
if _is_rdf_model(arg):
return arg # type: ignore[no-any-return]
return None
__all__ = ["RdfBaseModel", "RdfProperty", "LangString", "LangStringList", "LocalizedStr"]
# Ensure defaults are preserved when using lightweight pydantic substitutes.
RdfBaseModel.rdf_id_field = "id"