Source code for dartfx.mtnards.dcat

"""
DCAT support for MTNA RDS

"""

from collections.abc import Iterable

from rdflib import Graph

from dartfx.dcat import dcat

from .catalog import MtnaRdsCatalog
from .data_product import MtnaRdsDataProduct
from .server import MtnaRdsServer


[docs] class MtnaRdsDcat: """ DCAT exporter for MTNA RDS servers, catalogs, and data products. """ server: MtnaRdsServer catalogs: set[MtnaRdsCatalog] datasets: set[MtnaRdsDataProduct] uri_style: str | None
[docs] def __init__( self, server: MtnaRdsServer, datasets: set[MtnaRdsDataProduct | str] | None = None, catalog: MtnaRdsCatalog | None = None, data_product: MtnaRdsDataProduct | None = None, uri_style: str | None = None, ): """ Initializes the DCAT exporter. Args: server: The MTNA RDS server to export from. datasets: A set of data products to include. catalog: Optional single catalog to include. data_product: Optional single data product to include. uri_style: Optional URI generation style (currently unused by this exporter). """ self.server = server self.uri_style = uri_style self.catalogs = set() self.datasets = set() if datasets: self.add_datasets(datasets) if catalog: self.add_catalog(catalog) if data_product: self.add_dataset(data_product)
[docs] def graph(self) -> Graph: """Alias for :meth:`get_graph` for backward compatibility with documentation.""" return self.get_graph()
[docs] def get_prefixes_ttl(self, dataset: MtnaRdsDataProduct) -> str: prefixes = f"@prefix catalog: <{dataset._catalog._server.host}/>." prefixes += """ @prefix hvdn: <https://rdf.highvaluedata.net/dcat#> . @prefix dcat: <http://www.w3.org/ns/dcat#> . @prefix dcterms: <http://purl.org/dc/terms/> . @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . """ return prefixes
[docs] def add_catalog(self, catalog: MtnaRdsCatalog | str) -> None: """Adds a single catalog to the generator""" self.add_catalogs([catalog])
[docs] def add_catalogs(self, catalogs: Iterable[MtnaRdsCatalog | str]) -> None: """Adds catalogs to the generator""" for item in catalogs: if isinstance(item, MtnaRdsCatalog): self.catalogs.add(item) elif isinstance(item, str): raise NotImplementedError else: raise ValueError(f"Unexpected catalog type: {type(item)}")
[docs] def add_dataset(self, dataset: MtnaRdsDataProduct | str) -> None: """Adds a single dataset to the generator""" self.add_datasets([dataset])
[docs] def add_datasets(self, datasets: Iterable[MtnaRdsDataProduct | str]) -> None: """Adds datasets to the generator""" for item in datasets: if isinstance(item, MtnaRdsDataProduct): self.datasets.add(item) elif isinstance(item, str): raise NotImplementedError() else: raise ValueError(f"Unexpected dataset type: {type(item)}")
def _create_dcat_catalog(self, rds_catalog: MtnaRdsCatalog, stub_only: bool = False) -> dcat.Catalog: """Internal helper to generates a DCAT graph for a catalog""" dcat_catalog = dcat.Catalog() dcat_catalog.set_uri(rds_catalog.uri) dcat_catalog.add_title(rds_catalog.name) if rds_catalog.description: dcat_catalog.add_description(rds_catalog.description) dcat_catalog.add_publisher(f"{rds_catalog._server.host}") if not stub_only and rds_catalog.data_products: for rds_data_product in rds_catalog.data_products: dcat_dataset = self._create_dcat_dataset(rds_data_product) dcat_catalog.add_dataset(dcat_dataset) dcat_api = self._create_dcat_api_service(rds_data_product, dcat_dataset) dcat_catalog.add_service(dcat_api) # add service to catalog return dcat_catalog def _create_dcat_dataset( self, rds_data_product: MtnaRdsDataProduct, ) -> dcat.Dataset: dcat_dataset = dcat.Dataset() dcat_dataset.set_uri(rds_data_product.uri) dcat_dataset.add_title(rds_data_product.name) if rds_data_product.description: dcat_dataset.add_description(rds_data_product.description) explorer_url = f"{self.server.host}/explorer/explore/{rds_data_product._catalog.id}/{rds_data_product.id}" dcat_dataset.add_landing_page(explorer_url) dcat_dataset.add_modified_date(rds_data_product.last_update) dcat_dataset.add_publisher(f"{rds_data_product._catalog._server.host}") return dcat_dataset def _create_dcat_api_service( self, rds_data_product: MtnaRdsDataProduct, dcat_dataset: dcat.Dataset, ) -> dcat.DataService: dcat_api = dcat.DataService() dcat_api.set_uri(f"{rds_data_product.uri}-api") dcat_api.add_served_dataset(dcat_dataset) dcat_api.add_conforms_to(rds_data_product._catalog._server.base_url + "/swagger") dcat_api.add_endpoint_url(rds_data_product._catalog._server.api_url) dcat_api.add_type("https://highvaluedata.net/vocab/service_type#MtnaRdsOpenAPI") return dcat_api
[docs] def get_graph(self) -> Graph: """Generates RDF graph for server, catalogs, and datasets. The workflow is as follows: - Generate DCAT graph - Initialize the server catalog - Initialize the catalogs - Initialize the datasets add to graph - Add datasets to graph - Add server catalog to graph """ g = Graph() # # Server Catalog # dcat_server_catalog = dcat.Catalog() dcat_server_catalog.set_uri(f"{self.server.host}") dcat_server_catalog.add_publisher(f"{self.server.host}") # Loop over catalogs dcat_catalogs = {} # keeps track of catalogs being added for rds_catalog in self.catalogs: dcat_catalog = self._create_dcat_catalog(rds_catalog) dcat_server_catalog.add_catalog(dcat_catalog) # add to server level catalog dcat_catalogs[rds_catalog.uri] = dcat_catalog # register # Loop over datasets for rds_data_product in self.datasets: # add data product's catalog if needed rds_catalog = rds_data_product._catalog dcat_catalog = dcat_catalogs.get(rds_catalog.uri) if dcat_catalog is None: dcat_catalog = self._create_dcat_catalog(rds_catalog, stub_only=True) dcat_server_catalog.add_catalog(dcat_catalog) # add to server level catalog dcat_catalogs[rds_catalog.uri] = dcat_catalog # register # create DCAT dataset dcat_dataset = self._create_dcat_dataset(rds_data_product) dcat_catalog.add_dataset(dcat_dataset) # add dataset to catalog # create DCAT API service dcat_api = self._create_dcat_api_service(rds_data_product, dcat_dataset) dcat_catalog.add_service(dcat_api) # add service to catalog # add dataset resources to graph dcat_dataset.add_to_rdf_graph(g) # add API service to graph dcat_api.add_to_rdf_graph(g) # add catalogs to graph for dcat_catalog in dcat_catalogs.values(): dcat_catalog.add_to_rdf_graph(g) # add server catalog to graph dcat_server_catalog.add_to_rdf_graph(g) return g