Source code for dartfx.ddi.ddicodebook

"""Classes to read and process a DDI-Codebook XML document.

This package is at this time not intended to be used for validation
or quality assurance purpose, just as a quick and easy way to load and
process existing DDI-C documents in Python.

It is also not designed to create DDI from scratch.

Author:
     Pascal Heus (pascal.heus@postman.com)

Contributors:
      <be_the_first!>

Version: 0.5.0

How to use::
     from dartfx.ddi import codebook
     my_codebook = codebook.loadxml(filename)

Implementation notes:
     - Based on the version 2.5 of the schema
     - The name of the classes match the complex types defined in DDI-C
     - The name of the classes properties must match the DDI-C element names
     - Type annotations are used to determine the type of the DDI properties
     - The bulk of the work is done in the baseElementType class, from which all other classes inherit
     - An 'options' parameter is passed to all class constructors, but is for future use

Roadmap:
     - Extensive testing
     - Add element specific helper methods to facilite processing

Pending DDI 2.5 issues/bugs:
     - dataCollType/sources is not repeatable which seems to be a bug
     - dataFingerprintType (used in filedscr) does not derive from baseElementType and uses xs:string instead of stringType
     - codeListSchemeURN in controlledVocabUsedType has no type (should be stringType)
     - usageType does not derive from baseElementType, and neither do the underlying elements.

References:
     - https://docs.python.org/3/howto/annotations.html


"""

from __future__ import annotations

import inspect
import logging
import os
import re
import xml.etree.ElementTree as ET
from typing import Any

from pydantic import BaseModel, ConfigDict, Field


[docs] def get_xml_base_name(tag): """ Extracts the base name of an XML element, removing the namespace. """ if "}" in tag: return tag.split("}", 1)[1] return tag
[docs] def loadxml(file) -> codeBookType: """Loads a DDI codebook from an XML file.""" tree = ET.parse(file) root = tree.getroot() ddicodebook = codeBookType() # type: ignore ddicodebook.from_xml_element(root) return ddicodebook
[docs] def loadxmlstring(xmlstring) -> codeBookType: """Loads a DDI codebook from an XML string.""" root = ET.fromstring(xmlstring) ddicodebook = codeBookType() # type: ignore ddicodebook.from_xml_element(root) return ddicodebook
[docs] def get_mixed_content(element) -> str: """Returns the mixed content of an XML element as a concatenated and potentially multiline string. This is to avoid having to implement/parse various text formatting options supported by DDI-C such as XHTML or forms. """ content = "" if element.text: content += element.text.strip() for child in element: content += f"<{child.tag}>" content += get_mixed_content(child) content += f"</{child.tag}>" if child.tail: content += child.tail.strip() return content
[docs] class XmlAttribute: """A simple structure to hold the name, value, and potentially other characteristics of an attribute."""
[docs] def __init__(self, name, value=None, datatype=str, options=None): self.name = name self.value = value self.datatype = datatype self._options = options
def __str__(self): return str(self.value)
[docs] class baseElementType(BaseModel): """The base class all DDI elements are based on. All the parsing and processing is done in this base class. """ model_config = ConfigDict(extra="allow", populate_by_name=True, arbitrary_types_allowed=True) # Common attributes id: str | None = Field(None, alias="ID") xml_lang: str | None = Field(None, alias="xml:lang") elementVersion: str | None = None elementVersionDate: str | None = None ddiLifecycleUrn: str | None = None ddiCodebookUrn: str | None = None content: str | None = None
[docs] def __init__(self, options=None, **data): super().__init__(**data) self._options = options
@property def _attributes(self) -> dict[str, XmlAttribute]: """ Backward compatibility property to mimic the old _attributes dictionary. Only returns attributes that have values. """ attrs = {} # Iterate over model fields for field_name, field_info in self.model_fields.items(): value = getattr(self, field_name) if value is not None and field_name != "content": alias = field_info.alias or field_name # Skip list/model fields that are children, only primitive attributes # This is a heuristic - strict mapping would need metadata if isinstance(value, (str, int, bool, float)): attrs[alias] = XmlAttribute(alias, value) return attrs @property def attributes(self) -> dict[str, XmlAttribute]: return self._attributes @property def _content(self) -> str | None: return self.content
[docs] def dump(self, name="codeBook", level=0, max_level=99, indent=3): """Dumps the content to the console. Useful for debugging/development purposes. Uses ANSI escape code for coloring See https://www.lihaoyi.com/post/BuildyourownCommandLinewithANSIescapecodes.html """ if level > max_level: return print("\u001b[0m\u001b[34m", end="") print(f"{' ' * level * indent}{name} ({self.__class__.__name__})") # attributes print("\u001b[0m\u001b[32m", end="") for attrib, value in self.attributes.items(): print(f"{' ' * (level * indent + indent)}@{attrib}: {value.value}") # content if self.content: lines = self.content.splitlines() print("\u001b[0m\u001b[30m", end="") for line in lines: print(f"{' ' * (level * indent)}{line}") # children for attr in self.__dict__: if attr in ["_options", "content"] or attr in self.model_fields: continue # handled above or internal value = getattr(self, attr) if isinstance(value, list): for child in value: if hasattr(child, "dump"): child.dump(attr, level + 1, max_level, indent) elif hasattr(value, "dump"): value.dump(attr, level + 1, max_level, indent) print("\u001b[0m", end="")
[docs] def from_xml_element(self, element: ET.Element): """Initializes the object from an XML element.""" cls_annotations = self.get_annotations() # Add attributes for attrib, value in element.attrib.items(): # Map XML attribute to model field # simple mapping: check if field exists with alias match field_found = False for field_name, field_info in self.model_fields.items(): if field_info.alias == attrib or field_name == attrib: setattr(self, field_name, value) field_found = True break if not field_found and attrib != "xsi:schemaLocation": # ignore schema location # Dynamic attribute handling not explicitly defined in model is discarded by default logic above # unless we store them in extra fields. But the original code restricted to _valid_attributes # For Pydantic, we rely on fields. If it's not a field, it's ignored (or warned). # logging.warn(f"Attribute {attrib} ignored on {self.__class__.__name__}") pass # Add children for child in element: base_name = get_xml_base_name(child.tag) # check if the property exists as a child DDI element if base_name in cls_annotations: # get the annotated type property_annotation = cls_annotations[base_name] # print(property_annotation) if property_annotation: if property_annotation["is_ddi_element"]: # create the object instance based on the type/class instance_cls = globals()[property_annotation["type"]] instance = instance_cls() # options=self._options # parse the XML element instance.from_xml_element(child) if property_annotation["is_list"]: # if this is a list, make sure it is initialized as an array if not hasattr(self, base_name) or getattr(self, base_name) is None: setattr(self, base_name, []) # add element to the list getattr(self, base_name).append(instance) else: # set the non-repeatable element value setattr(self, base_name, instance) else: # annotated but does not appear to have an associated class logging.warning(f"No DDI class found for element {base_name} in {self.__class__.__name__}") else: # this element in not annotated (likely a bug) logging.warning( f"No type annotation found for child element {base_name} in {self.__class__.__name__}" ) else: # don't know this element logging.warning(f"Child element {base_name} ignored on {self.__class__.__name__}") # Parse text content - special handling for abstractTextType etc # Rely on subclasses overriding mixed content logic or default here if not list(element): # if no children, take text if element.text and element.text.strip(): self.content = element.text.strip()
[docs] def get_annotations(self): """Helper function to parse annotated class properties. REIMPLEMENTED for Pydantic fields """ annotations_info = {} # Use type_hints to get forward refs resolved if possible, but fallback to manual parsing # because globals() might be needed and strict Pydantic inspection is sometimes tricky with forward refs for property, annotation in inspect.get_annotations(self.__class__).items(): # Ignore internal pydantic fields/methods if property.startswith("_") or property in ["model_config", "model_fields"]: continue annotation_str = str(annotation) # Handle Union types like 'Type | None' or 'Optional[Type]' annotation_str = re.sub(r"\s*\|\s*None", "", annotation_str) annotation_str = re.sub(r"Optional\[(.*?)\]", r"\1", annotation_str) if "Union[" in annotation_str: annotation_str = annotation_str.replace("Union[", "").rstrip("]").split(",")[0].strip() # detect if this is a List (repeatable property) is_list = "List[" in annotation_str or "list[" in annotation_str # extract inner type # This determines the target class if "[" in annotation_str: inner = annotation_str.split("[", 1)[1].rsplit("]", 1)[0] # handle forward refs string if "ForwardRef" in inner: match = re.search(r"ForwardRef\(\'(.*?)\'\)", inner) property_type = match.group(1) if match else inner elif "'" in inner: property_type = inner.replace("'", "").replace('"', "") else: property_type = inner else: property_type = annotation_str.replace("'", "").replace('"', "") # cleanup type name if "codebook." in property_type: property_type = property_type.split("codebook.")[1] # check if this inherits from baseElementType (now BaseModel) # We use string lookup cls = globals().get(property_type) is_ddi_element = False if cls and issubclass(cls, BaseModel): is_ddi_element = True # initialize info to return for this property annotation_info = { "name": property, "type": property_type, "is_list": is_list, "is_ddi_element": is_ddi_element, } annotations_info[property] = annotation_info return annotations_info
# # THIS SECTION CONTAINS THE REUSABLE TEXT TYPES # BASED ON abstractTextType #
[docs] class abstractTextType(baseElementType):
[docs] def from_xml_element(self, element: ET.Element): """Override method to stop driling down and capture underlying mixed content as text""" super().from_xml_element(element) # process attributes # but explicitly capture mixed content self.content = get_mixed_content(element)
[docs] class dateType(abstractTextType): pass
[docs] class stringType(abstractTextType): varRef: str | None = None
[docs] class simpleTextType(abstractTextType): pass
[docs] class simpleTextAndDateType(simpleTextType): date: str | None = None
[docs] class phraseType(simpleTextType): varRef: str | None = None
[docs] class tableType(baseElementType): frame: str | None = None colsep: str | None = None rowsep: str | None = None pgwide: str | None = None
[docs] class tableAndTextType(abstractTextType): table: tableType | None = None
[docs] class txtType(tableAndTextType): level: str | None = None sdatrefs: str | None = None
[docs] class conceptType(simpleTextType): vocab: str | None = None vocabUri: str | None = None
[docs] class conceptualTextType(abstractTextType): concept: conceptType | None = None txt: txtType | None = None
# # THIS SECTION CONTAINS ALL THE DDI ELEMENT TYPES #
[docs] class abstractType(simpleTextAndDateType): contentType: str | None = None
[docs] class accsPlacType(simpleTextType): URI: str | None = None
[docs] class anlyInfoType(baseElementType): respRate: list[simpleTextType] = Field(default_factory=list) EstSmpErr: list[simpleTextType] = Field(default_factory=list) dataAppr: list[dataApprType] = Field(default_factory=list)
[docs] class anlyUnitType(conceptualTextType): unit: str | None = None
[docs] class attributeType(stringType): # note: this is a xs:string in the schema (on usageType) pass
[docs] class AuthEntyType(simpleTextType): affiliation: str | None = None
[docs] class authorizingAgencyType(stringType): affiliation: str | None = None abbr: str | None = None
[docs] class backwardType(simpleTextType): qstn: str | None = None
[docs] class biblCitType(simpleTextType): format: str | None = None
[docs] class boundPolyType(baseElementType): polygon: list[polygonType] = Field(default_factory=list)
[docs] class catgryGrpType(baseElementType): labl: list[lablType] = Field(default_factory=list) catStat: list[catStatType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) missing: str | None = None missType: str | None = None catgry: str | None = None catGrp: str | None = None levelno: str | None = None levelnm: str | None = None compl: str | None = None excls: str | None = None
[docs] class catgryType(baseElementType): catValu: simpleTextType | None = None # Optional because it can be missing labl: list[lablType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) catStat: list[catStatType] = Field(default_factory=list) mrow: mrowType | None = None missing: str | None = None missType: str | None = None country: str | None = None sdatrefs: str | None = None excls: str | None = None catgry: str | None = None level: str | None = None @property def is_missing(self): return str(self.missing) == "Y"
[docs] class catLevelType(baseElementType): levelnm: str | None = None geoMap: str | None = None
[docs] class catStatType(simpleTextType): type: str | None = None otherType: str | None = None URI: str | None = None methrefs: str | None = None wgtd: str | None = None wgt_var: str | None = Field(None, alias="wgt-var") weight: str | None = None sdatrefs: str | None = None
[docs] class citationType(baseElementType): titlStmt: titlStmtType | None = None rspStmt: rspStmtType | None = None prodStmt: prodStmtType | None = None distStmt: distStmtType | None = None serStmt: list[serStmtType] = Field(default_factory=list) verStmt: list[verStmtType] = Field(default_factory=list) biblCit: list[biblCitType] = Field(default_factory=list) holdings: list[holdingsType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) MARCURI: str | None = None
[docs] class confDecType(simpleTextType): required: str | None = None formNo: str | None = None URI: str | None = None
[docs] class cleanOpsType(simpleTextType): agency: str | None = None
[docs] class ConOpsType(simpleTextType): agency: str | None = None
[docs] class contactType(simpleTextType): affiliation: str | None = None URI: str | None = None email: str | None = None
[docs] class codeBookType(baseElementType): docDscr: list[docDscrType] = Field(default_factory=list) stdyDscr: list[stdyDscrType] = Field(default_factory=list) fileDscr: list[fileDscrType] = Field(default_factory=list) dataDscr: list[dataDscrType] = Field(default_factory=list) otherMat: list[otherMatType] = Field(default_factory=list) version: str | None = None codeBookAgency: str | None = None # HELPERS
[docs] def get_abstract(self) -> str: """Returns the abstract from the study description if it exists.""" value = None if self.stdyDscr: stdyDscr = self.stdyDscr[0] if stdyDscr.stdyInfo: stdyInfo = stdyDscr.stdyInfo[0] if stdyInfo.abstract: abstract = stdyInfo.abstract[0] value = str(abstract.content) return value # type: ignore
[docs] def get_alternate_title(self) -> str: """Returns the alternate title from the study description if it exists.""" value = None if self.stdyDscr: stdyDscr = self.stdyDscr[0] if stdyDscr.citation: citation = stdyDscr.citation[0] if hasattr(citation, "titlStmt") and citation.titlStmt: titlStmt = citation.titlStmt if titlStmt.altTitl: altTitle = titlStmt.altTitl[0] value = str(altTitle.content) return value # type: ignore
[docs] def get_data_dictionary( self, file_id: str | None = None, name_regex: str | None = None, label_regex: str | None = None, categories: bool = False, questions: bool = False, ) -> dict[str, dict]: """Generates a all-in-one data dictionary from the variable descriptions. Supports various filtering and rendering options. Args: file_id: filter to a specific file identifier (`var/@files` attribute matching `fileDscr/@ID`) name_regex: a regular expression to match variable names label_regex: a regular expression to match variable names categories: whether to include categories in the data dictionary questions: whether to include questions in the data dictionary """ value = {} for dataDscr in self.dataDscr: for var in dataDscr.var: if not file_id or (var.files and file_id in var.files): var_info: dict[str, Any] = {"id": var.id} # name if var.name: var_name = var.name if name_regex and not re.match(name_regex, str(var_name), re.IGNORECASE): continue var_info["name"] = var_name elif name_regex: continue # label if var.labl: var_label = var.labl[0].content if label_regex and not re.match(label_regex, str(var_label), re.IGNORECASE): continue var_info["label"] = var_label elif label_regex: continue # categories if var.catgry: var_info["n_categories"] = len(var.catgry) if categories: cats = [] for catgry in var.catgry: cat = {} if catgry.catValu: cat["value"] = catgry.catValu.content if catgry.labl: cat["label"] = catgry.labl[0].content if cat: cats.append(cat) var_info["categories"] = cats else: var_info["n_categories"] = 0 # question var_info["has_question"] = bool(var.qstn) if var_info["has_question"] and questions: var_qstn = var.qstn[0] qstn_info = {} if var_qstn.preQTxt: qstn_info["pre"] = var_qstn.preQTxt.content if var_qstn.qstnLit: qstn_info["literal"] = var_qstn.qstnLit.content if var_qstn.postQTxt: qstn_info["post"] = var_qstn.postQTxt.content if var_qstn.forward: qstn_info["forward"] = var_qstn.forward.content if var_qstn.backward: qstn_info["backward"] = var_qstn.backward.content if var_qstn.ivuInstr: qstn_info["instructions"] = var_qstn.ivuInstr.content var_info["question"] = qstn_info # add to dictionary value[var.id] = var_info return value # type: ignore
[docs] def get_files(self) -> dict[str, dict]: """Returns the files and their documented infornation.""" value = {} for fileDscr in self.fileDscr: file = {} file["id"] = fileDscr.id if fileDscr.fileTxt: fileTxt = fileDscr.fileTxt[0] if fileTxt.fileName: fileName = fileTxt.fileName[0] file["name"] = str(fileName.content) file["basename"] = os.path.splitext(str(file.get("name", "")))[0] if hasattr(fileTxt, "fileCont") and fileTxt.fileCont: file["content"] = str(fileTxt.fileCont.content) if hasattr(fileTxt, "dimensns") and fileTxt.dimensns: if fileTxt.dimensns.caseQnty: file["n_records"] = fileTxt.dimensns.caseQnty[0].content if fileTxt.dimensns.varQnty: file["n_variables"] = fileTxt.dimensns.varQnty[0].content value[file["id"]] = file return value # type: ignore
[docs] def get_title(self) -> str: """Returns the title of the study.""" value = None if self.stdyDscr: stdyDscr = self.stdyDscr[0] if stdyDscr.citation: citation = stdyDscr.citation[0] if hasattr(citation, "titlStmt") and citation.titlStmt: titlStmt = citation.titlStmt if hasattr(titlStmt, "titl") and titlStmt.titl: titl = titlStmt.titl value = str(titl.content) return value # type: ignore
[docs] def get_subtitle(self) -> str: """Returns the subtitle of the study.""" value = None if self.stdyDscr: stdyDscr = self.stdyDscr[0] if stdyDscr.citation: citation = stdyDscr.citation[0] if citation.titlStmt: titlStmt = citation.titlStmt if titlStmt.subTitl: subtitl = titlStmt.subTitl[0] value = str(subtitl.content) return value # type: ignore
[docs] def search_variables( self, _file_id: str | None = None, _name: str | None = None, _label: str | None = None, _has_catgry: bool | None = None, _has_qstn: bool | None = None, ): """ Search variables in the codebook """ vars = [] for dataDscr in self.dataDscr: for var in dataDscr.var: vars.append(var) return vars
[docs] class codingInstructionsType(baseElementType): txt: list[txtType] = Field(default_factory=list) command: list[commandType] = Field(default_factory=list) type: str | None = None relatedProcesses: str | None = None
[docs] class cohortType(baseElementType): range: list[rangeType] = Field(default_factory=list) catRef: str | None = None value: str | None = None
[docs] class collDateType(simpleTextAndDateType): event: str | None = None cycle: str | None = None
[docs] class collectorTrainingType(simpleTextType): type: str | None = None
[docs] class commandType(stringType): formalLanguage: str | None = None
[docs] class controlledVocabUsedType(baseElementType): codeListID: stringType | None = None codeListName: stringType | None = None codeListAgencyName: stringType | None = None codeListVersionID: stringType | None = None codeListURN: stringType | None = None codeListSchemeURN: stringType | None = None usage: list[usageType] = Field(default_factory=list)
[docs] class CubeCoordType(baseElementType): coordNo: str | None = None coordVal: str | None = None coordValRef: str | None = None
[docs] class custodianType(stringType): affiliation: str | None = None abbr: str | None = None
[docs] class dataAccsType(baseElementType): setAvail: list[setAvailType] = Field(default_factory=list) useStmt: list[useStmtType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list)
[docs] class dataApprType(simpleTextType): type: str | None = None
[docs] class dataCollectorType(conceptualTextType): abbr: str | None = None affiliation: str | None = None role: str | None = None
[docs] class dataDscrType(baseElementType): varGrp: list[varGrpType] = Field(default_factory=list) nCubeGrp: list[nCubeGrpType] = Field(default_factory=list) var: list[varType] = Field(default_factory=list) nCube: list[nCubeType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list)
[docs] class dataKindType(conceptualTextType): type: str | None = None
[docs] class dataProcessingType(simpleTextType): type: str | None = None
[docs] class depositrType(simpleTextType): abbr: str | None = None affiliation: str | None = None
[docs] class distrbtrType(simpleTextType): abbr: str | None = None affiliation: str | None = None URI: str | None = None
[docs] class distStmtType(baseElementType): distrbtr: list[distrbtrType] = Field(default_factory=list) contact: list[contactType] = Field(default_factory=list) depositr: list[depositrType] = Field(default_factory=list) depDate: list[simpleTextAndDateType] = Field(default_factory=list) distDate: list[simpleTextAndDateType] = Field(default_factory=list)
[docs] class dataCollType(baseElementType): timeMeth: list[timeMethType] = Field(default_factory=list) dataCollector: list[dataCollectorType] = Field(default_factory=list) collectorTraining: list[collectorTrainingType] = Field(default_factory=list) frequenc: list[frequencType] = Field(default_factory=list) sampProc: list[conceptualTextType] = Field(default_factory=list) sampleFrame: list[sampleFrameType] = Field(default_factory=list) targetSampleSize: list[conceptualTextType] = Field(default_factory=list) deviat: list[simpleTextType] = Field(default_factory=list) collMode: list[conceptualTextType] = Field(default_factory=list) resInstru: list[resInstruType] = Field(default_factory=list) instrumentDevelopment: list[instrumentDevelopmentType] = Field(default_factory=list) sources: list[sourcesType] = Field(default_factory=list) collSitu: list[simpleTextType] = Field(default_factory=list) actMin: list[simpleTextType] = Field(default_factory=list) ConOps: list[ConOpsType] = Field(default_factory=list) weight: list[simpleTextType] = Field(default_factory=list) cleanOps: list[cleanOpsType] = Field(default_factory=list)
[docs] class dataFingerprintType(baseElementType): # Note that this type does no derive from baseElementType in the schema # It also uses xs:string instead of stringType digitalFingerprintValue: stringType | None = None algorithmSpecification: stringType | None = None algorithmversion: stringType | None = None
[docs] class dataItemType(baseElementType): CubeCoord: list[CubeCoordType] = Field(default_factory=list) physLoc: list[physLocType] = Field(default_factory=list) varRef: str | None = None nCubeRef: str | None = None
[docs] class derivationType(baseElementType): drvdesc: list[simpleTextType] = Field(default_factory=list) drvcmd: list[drvcmdType] = Field(default_factory=list) var: str | None = None
[docs] class developmentActivityType(baseElementType): description: list[simpleTextType] = Field(default_factory=list) participant: list[participantType] = Field(default_factory=list) resource: list[resourceType] = Field(default_factory=list) outcome: list[simpleTextType] = Field(default_factory=list) type: str | None = None
[docs] class dimensnsType(baseElementType): caseQnty: list[simpleTextType] = Field(default_factory=list) varQnty: list[simpleTextType] = Field(default_factory=list) logRecL: list[simpleTextType] = Field(default_factory=list) recPrCase: list[simpleTextType] = Field(default_factory=list) recNumTot: list[simpleTextType] = Field(default_factory=list)
[docs] class dmnsType(baseElementType): cohort: list[cohortType] = Field(default_factory=list) rank: str | None = None varRef: str | None = None
[docs] class docDscrType(baseElementType): citation: citationType | None = None guide: list[simpleTextType] = Field(default_factory=list) docStatus: list[simpleTextType] = Field(default_factory=list) docSrc: list[docSrcType] = Field(default_factory=list) controlledVocabUsed: list[controlledVocabUsedType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list)
[docs] class docSrcType(baseElementType): titlStmt: titlStmtType | None = None rspStmt: rspStmtType | None = None prodStmt: prodStmtType | None = None distStmt: distStmtType | None = None serStmt: list[serStmtType] = Field(default_factory=list) verStmt: list[verStmtType] = Field(default_factory=list) biblCit: list[biblCitType] = Field(default_factory=list) holdngs: list[holdingsType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) MARCURI: str | None = None
[docs] class drvcmdType(simpleTextType): syntax: str | None = None
[docs] class embargoType(simpleTextAndDateType): event: str | None = None format: str | None = None
[docs] class evaluatorType(stringType): affiliation: str | None = None abbr: str | None = None role: str | None = None
[docs] class eventDateType(dateType): event: str | None = None
[docs] class exPostEvaluationType(baseElementType): evaluator: list[evaluatorType] = Field(default_factory=list) evaluationProcess: list[simpleTextType] = Field(default_factory=list) outcomes: list[simpleTextType] = Field(default_factory=list) completionDate: str | None = None type: str | None = None
[docs] class fileDscrType(baseElementType): fileTxt: list[fileTxtType] = Field(default_factory=list) locMap: locMapType | None = None notes: list[notesType] = Field(default_factory=list) URI: str | None = None sdatrefs: str | None = None methrefs: str | None = None pubrefs: str | None = None access: str | None = None
[docs] class fileStrcType(baseElementType): recGrp: list[recGrpType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) type: str | None = None otherType: str | None = None fileStrcRef: str | None = None
[docs] class fileTxtType(baseElementType): fileName: list[simpleTextType] = Field(default_factory=list) fileCitation: citationType | None = None dataFingerprint: list[dataFingerprintType] = Field(default_factory=list) fileCont: simpleTextType | None = None fileStr: fileStrcType | None = None dimensns: dimensnsType | None = None fileType: list[fileTypeType] = Field(default_factory=list) format: list[simpleTextType] = Field(default_factory=list) filePlac: list[simpleTextType] = Field(default_factory=list) dataChck: list[simpleTextType] = Field(default_factory=list) ProcStat: list[simpleTextType] = Field(default_factory=list) dataMsng: list[simpleTextType] = Field(default_factory=list) software: list[softwareType] = Field(default_factory=list) verStmt: list[verStmtType] = Field(default_factory=list)
[docs] class fileTypeType(simpleTextType): charset: str | None = None
[docs] class frequencType(simpleTextType): freq: str | None = None
[docs] class forwardType(simpleTextType): qstn: str | None = None
[docs] class frameUnitType(baseElementType): unitType: unitTypeType | None = None txt: list[txtType] = Field(default_factory=list) isPrimary: str | None = None
[docs] class fundAgType(simpleTextType): abbr: str | None = None role: str | None = None
[docs] class geoBndBoxType(baseElementType): westBL: phraseType | None = None eastBL: phraseType | None = None northBL: phraseType | None = None southBL: phraseType | None = None
[docs] class geoMapType(baseElementType): URI: str | None = None mapFormat: str | None = None levelno: str | None = None
[docs] class grantNoType(simpleTextType): agency: str | None = None role: str | None = None
[docs] class holdingsType(simpleTextType): location: str | None = None callno: str | None = None URI: str | None = None media: str | None = None
[docs] class IDNoType(simpleTextType): agency: str | None = None level: str | None = None
[docs] class instrumentDevelopmentType(simpleTextType): type: str | None = None
[docs] class invalrngType(baseElementType): item: list[itemType] = Field(default_factory=list) range: list[rangeType] = Field(default_factory=list) key: list[tableAndTextType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list)
[docs] class itemType(baseElementType): UNITS: str | None = None VALUE: str | None = None
[docs] class keywordType(simpleTextType): vocab: str | None = None vocabURI: str | None = None
[docs] class lablType(simpleTextType): level: str | None = None vendor: str | None = None country: str | None = None sdatrefs: str | None = None
[docs] class locationType(baseElementType): StartPos: str | None = None EndPos: str | None = None width: str | None = None RecSegNo: str | None = None field: str | None = None locMap: str | None = None
[docs] class locMapType(baseElementType): dataItem: list[dataItemType] = Field(default_factory=list)
[docs] class materialReferenceType(abstractTextType): # TODO: This element requires special handlinas it # allows mixed content and Citation elements # citation: List["citationType"] pass
[docs] class measureType(baseElementType): varRef: str | None = None aggrMeth: str | None = None otherAggrMeth: str | None = None measUnit: str | None = None scale: str | None = None origin: str | None = None additivity: str | None = None
[docs] class methodType(baseElementType): dataColl: list[dataCollType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) anlyInfo: list[anlyInfoType] = Field(default_factory=list) stdyClas: list[stdyClasType] = Field(default_factory=list) dataProcessing: list[dataProcessingType] = Field(default_factory=list) codingInstructions: list[codingInstructionsType] = Field(default_factory=list)
[docs] class miType(phraseType): pass
[docs] class mrowType(baseElementType): mi: list[miType] = Field(default_factory=list)
[docs] class nationType(conceptualTextType): abbr: str | None = None
[docs] class nCubeType(baseElementType): location: list[locationType] = Field(default_factory=list) labl: list[lablType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) universe: list[universeType] = Field(default_factory=list) imputation: list[simpleTextType] = Field(default_factory=list) security: list[simpleTextAndDateType] = Field(default_factory=list) embargo: list[embargoType] = Field(default_factory=list) respUnit: list[simpleTextType] = Field(default_factory=list) anlysUnit: list[simpleTextType] = Field(default_factory=list) verStmt: list[verStmtType] = Field(default_factory=list) purpose: list[purposeType] = Field(default_factory=list) dmns: list[dmnsType] = Field(default_factory=list) measure: list[measureType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) name: str | None = None sdatrefs: str | None = None methrefs: str | None = None pubrefs: str | None = None access: str | None = None dmnsQnty: str | None = None cellQnty: str | None = None
[docs] class nCubeGrpType(baseElementType): labl: list[lablType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) concept: list[conceptType] = Field(default_factory=list) defntn: list[simpleTextType] = Field(default_factory=list) universe: list[universeType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) type: str | None = None otherType: str | None = None nCube: str | None = None nCubeGrp: str | None = None name: str | None = None sdatrefs: str | None = None methrefs: str | None = None pubrefs: str | None = None access: str | None = None
[docs] class notesType(tableAndTextType): type: str | None = None subject: str | None = None level: str | None = None resp: str | None = None sdatrefs: str | None = None parent: str | None = None sameNote: str | None = None
[docs] class otherMatType(baseElementType): labl: list[lablType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) table: list[tableType] = Field(default_factory=list) citation: citationType | None = None otherMat: list[otherMatType] = Field(default_factory=list) type: str | None = None level: str | None = None URI: str | None = None
[docs] class othrStdyMatType(baseElementType): relMat: list[relMatType] = Field(default_factory=list) relStdy: list[materialReferenceType] = Field(default_factory=list) relPubl: list[materialReferenceType] = Field(default_factory=list) othRefs: list[materialReferenceType] = Field( default_factory=list ) # the schema defines othRefsType but it's the same as materialReferenceType
[docs] class othIdType(simpleTextType): type: str | None = None role: str | None = None affiliation: str | None = None
[docs] class participantType(stringType): affiliation: str | None = None abbr: str | None = None role: str | None = None
[docs] class physLocType(baseElementType): type: str | None = None recRef: str | None = None startPos: str | None = None width: str | None = None endPos: str | None = None
[docs] class pointType(baseElementType): gringLat: phraseType | None = None gringLon: phraseType | None = None
[docs] class polygonType(baseElementType): point: list[pointType] = Field(default_factory=list)
[docs] class prodStmtType(baseElementType): producer: list[producerType] = Field(default_factory=list) copyright: list[simpleTextType] = Field(default_factory=list) prodDate: list[simpleTextAndDateType] = Field(default_factory=list) prodPlace: list[simpleTextType] = Field(default_factory=list) software: list[softwareType] = Field(default_factory=list) fundAg: list[fundAgType] = Field(default_factory=list) grantNo: list[grantNoType] = Field(default_factory=list)
[docs] class producerType(simpleTextType): abbr: str | None = None affiliation: str | None = None role: str | None = None
[docs] class purposeType(simpleTextType): sdatrefs: str | None = None methrefs: str | None = None pubrefs: str | None = None URI: str | None = None
[docs] class qualityStatementType(baseElementType): standardsCompliance: list[standardsComplianceType] = Field(default_factory=list) otherQualityStatement: list[simpleTextType] = Field(default_factory=list)
[docs] class qstnType(baseElementType): preQTxt: simpleTextType | None = None qstnLit: qstnLitType | None = None postQTxt: simpleTextType | None = None forward: forwardType | None = None backward: backwardType | None = None ivuInstr: simpleTextType | None = None qstn: str | None = None var: str | None = None seqNo: str | None = None sdatrefs: str | None = None responseDomainType: str | None = None otherResponseDomainType: str | None = None
[docs] class qstnLitType(simpleTextType): callno: str | None = None label: str | None = None media: str | None = None type: str | None = None
[docs] class rangeType(baseElementType): UNITS: str | None = None min: str | None = None minExclusive: str | None = None max: str | None = None maxExclusive: str | None = None
[docs] class recDimnsnType(baseElementType): varQnty: simpleTextType | None = None caseQnty: simpleTextType | None = None logRecL: simpleTextType | None = None level: str | None = None
[docs] class recGrpType(baseElementType): labl: list[lablType] = Field(default_factory=list) recDimnsn: recDimnsnType | None = None recGrp: str | None = None rectype: str | None = None keyvar: str | None = None rtypeloc: str | None = None type: str | None = None
[docs] class relMatType(materialReferenceType): sdatrefs: str | None = None
[docs] class resInstruType(conceptualTextType): type: str | None = None
[docs] class resourceType(baseElementType): dataSrc: list[simpleTextType] = Field(default_factory=list) srgOrig: list[conceptualTextType] = Field(default_factory=list) srcChar: list[simpleTextType] = Field(default_factory=list) srcDocu: list[simpleTextType] = Field(default_factory=list)
[docs] class rspStmtType(baseElementType): AuthEnty: list[AuthEntyType] = Field(default_factory=list) othId: list[othIdType] = Field(default_factory=list)
[docs] class sampleFrameType(baseElementType): sampleFrameName: list[stringType] = Field(default_factory=list) labl: list[lablType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) validPeriod: list[eventDateType] = Field(default_factory=list) custodian: list[custodianType] = Field(default_factory=list) useStmt: list[useStmtType] = Field(default_factory=list) universe: list[universeType] = Field(default_factory=list) frameUnit: list[frameUnitType] = Field(default_factory=list) referencePeriod: list[eventDateType] = Field(default_factory=list) updateProcedure: list[simpleTextType] = Field(default_factory=list)
[docs] class selectorType(stringType): # note: this is a xs:string in the schema (on usageType) pass
[docs] class serNameType(simpleTextType): abbr: str | None = None
[docs] class serStmtType(baseElementType): serName: list[serNameType] = Field(default_factory=list) serInfo: list[simpleTextType] = Field(default_factory=list) URI: str | None = None
[docs] class setAvailType(baseElementType): accsPlac: list[accsPlacType] = Field(default_factory=list) origArch: list[simpleTextType] = Field(default_factory=list) avlStatus: list[simpleTextType] = Field(default_factory=list) collSize: list[simpleTextType] = Field(default_factory=list) complete: list[simpleTextType] = Field(default_factory=list) fileQnty: list[simpleTextType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list)
[docs] class softwareType(simpleTextAndDateType): version: str | None = None
[docs] class sourcesType(baseElementType): dataSrc: list[simpleTextType] = Field(default_factory=list) sourceCitation: list[citationType] = Field(default_factory=list) srcOrig: list[conceptualTextType] = Field(default_factory=list) srcChar: list[simpleTextType] = Field(default_factory=list) srcDocu: list[simpleTextType] = Field(default_factory=list) sources: list[sourcesType] = Field(default_factory=list)
[docs] class specificElementType(stringType): refs: str | None = None authorizedCodeValue: str | None = None
[docs] class specPermType(simpleTextType): required: str | None = None formNo: str | None = None URI: str | None = None
[docs] class standardType(baseElementType): standardName: list[standardNameType] = Field(default_factory=list) producer: list[producerType] = Field(default_factory=list)
[docs] class standardsComplianceType(baseElementType): standard: standardType | None = None complianceDescription: list[simpleTextType] = Field(default_factory=list)
[docs] class standardNameType(stringType): date: str | None = None version: str | None = None URI: str | None = None
[docs] class stdCatgryType(simpleTextAndDateType): URI: str | None = None
[docs] class stdyClasType(simpleTextType): type: str | None = None
[docs] class stdyDscrType(baseElementType): citation: list[citationType] = Field(default_factory=list) studyAuthorization: list[studyAuthorizationType] = Field(default_factory=list) stdyInfo: list[stdyInfoType] = Field(default_factory=list) studyDevelopment: list[studyDevelopmentType] = Field(default_factory=list) method: list[methodType] = Field(default_factory=list) dataAccs: list[dataAccsType] = Field(default_factory=list) othrStdyMat: list[othrStdyMatType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) access: str | None = None
[docs] class stdyInfoType(baseElementType): studyBudget: list[simpleTextType] = Field(default_factory=list) subject: list[subjectType] = Field(default_factory=list) abstract: list[abstractType] = Field(default_factory=list) sumDscr: list[sumDscrType] = Field(default_factory=list) qualityStatement: list[qualityStatementType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) exPostEvaluation: list[exPostEvaluationType] = Field(default_factory=list)
[docs] class studyAuthorizationType(baseElementType): authorizingAgency: list[authorizingAgencyType] = Field(default_factory=list) authorzingStatement: list[simpleTextType] = Field(default_factory=list) date: str | None = None
[docs] class studyDevelopmentType(baseElementType): developmentActivity: list[developmentActivityType] = Field(default_factory=list)
[docs] class subjectType(baseElementType): keyword: list[keywordType] = Field(default_factory=list) topcClass: list[topcClasType] = Field(default_factory=list)
[docs] class sumDscrType(baseElementType): timePrd: list[timePrdType] = Field(default_factory=list) collDate: list[collDateType] = Field(default_factory=list) nation: list[nationType] = Field(default_factory=list) geogCover: list[conceptualTextType] = Field(default_factory=list) geogUnit: list[conceptualTextType] = Field(default_factory=list) geoBndBox: list[geoBndBoxType] = Field(default_factory=list) boundPoly: list[boundPolyType] = Field(default_factory=list) anlyUnit: list[anlyUnitType] = Field(default_factory=list) universe: list[universeType] = Field(default_factory=list) dataKind: list[dataKindType] = Field(default_factory=list)
[docs] class sumStatType(simpleTextType): wgtd: str | None = None wgt_var: str | None = Field(None, alias="wgt-var") weight: str | None = None type: str | None = None otherType: str | None = None
[docs] class titlStmtType(baseElementType): titl: simpleTextType | None = None subTitl: list[simpleTextType] = Field(default_factory=list) altTitl: list[simpleTextType] = Field(default_factory=list) parTitl: list[simpleTextType] = Field(default_factory=list) IDNo: list[IDNoType] = Field(default_factory=list)
[docs] class timeMethType(conceptualTextType): method: str | None = None
[docs] class timePrdType(simpleTextAndDateType): event: str | None = None cycle: str | None = None
[docs] class topcClasType(simpleTextType): vocab: str | None = None vocabURI: str | None = None
[docs] class universeType(conceptualTextType): level: str | None = None clusion: str | None = None
[docs] class unitTypeType(stringType): numberOfUnits: str | None = None
[docs] class usageType(baseElementType): # Note: this does not derive from baseElementType in the schema selector: selectorType | None = None specificElement: specificElementType | None = None attribute: attributeType | None = None
[docs] class useStmtType(baseElementType): confDec: list[confDecType] = Field(default_factory=list) specPerm: list[specPermType] = Field(default_factory=list) restrctn: list[simpleTextType] = Field(default_factory=list) contact: list[contactType] = Field(default_factory=list) citReq: list[simpleTextType] = Field(default_factory=list) deposReq: list[simpleTextType] = Field(default_factory=list) conditions: list[simpleTextType] = Field(default_factory=list) disclaimer: list[simpleTextType] = Field(default_factory=list)
[docs] class valrngType(baseElementType): item: list[itemType] = Field(default_factory=list) range: list[rangeType] = Field(default_factory=list) key: list[tableAndTextType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list)
[docs] class varType(baseElementType): location: list[locationType] = Field(default_factory=list) labl: list[lablType] = Field(default_factory=list) imputation: list[simpleTextType] = Field(default_factory=list) security: list[simpleTextAndDateType] = Field(default_factory=list) embargo: list[embargoType] = Field(default_factory=list) respUnit: list[simpleTextType] = Field(default_factory=list) anlysUnit: list[conceptualTextType] = Field(default_factory=list) qstn: list[qstnType] = Field(default_factory=list) valrng: list[valrngType] = Field(default_factory=list) invalrng: list[invalrngType] = Field(default_factory=list) undocCod: list[simpleTextType] = Field(default_factory=list) universe: list[universeType] = Field(default_factory=list) totlresp: list[simpleTextType] = Field(default_factory=list) sumStat: list[sumStatType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) stdCatgry: list[stdCatgryType] = Field(default_factory=list) catgryGrp: list[catgryGrpType] = Field(default_factory=list) catgry: list[catgryType] = Field(default_factory=list) codInstr: list[simpleTextType] = Field(default_factory=list) verStmt: list[verStmtType] = Field(default_factory=list) concept: list[conceptType] = Field(default_factory=list) derivation: derivationType | None = None varFormat: varFormatType | None = None geoMap: list[geoMapType] = Field(default_factory=list) catLevel: list[catLevelType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) name: str | None = None wgt: str | None = None wgt_var: str | None = Field(None, alias="wgt-var") weight: str | None = None var_qstn: str | None = Field(None, alias="qstn") # qstn attribute vs qstn element files: str | None = None vendor: str | None = None dcml: str | None = None intrvl: str | None = None rectype: str | None = None sdatrefs: str | None = None methrefs: str | None = None pubrefs: str | None = None access: str | None = None aggrMeth: str | None = None othAggrMeth: str | None = None scale: str | None = None origin: str | None = None nature: str | None = None additivity: str | None = None otherAdditivity: str | None = None temporal: str | None = None geog: str | None = None geoVocab: str | None = None catQnty: str | None = None representationType: str | None = None otherRepresentationType: str | None = None @property def n_catgry(self) -> int: if hasattr(self, "catgry") and self.catgry: return len(self.catgry) return 0 @property def n_missing_catgry(self) -> int: if self.n_catgry > 0: n_missing = 0 for catgry in self.catgry: if catgry.is_missing: n_missing += 1 return n_missing return 0 @property def n_non_missing_catgry(self) -> int: return self.n_catgry - self.n_missing_catgry
[docs] def get_catgry_checksum( self, _include_code: bool = True, _include_label: bool = True, _method: Any | None = None ) -> str: # TODO: compute checksum for catgry return ""
[docs] def get_label(self): value = None if self.labl: labl = self.labl[0] value = str(labl.content) return value
[docs] def get_name(self): return self.name
[docs] class varFormatType(simpleTextType): type: str | None = None formatname: str | None = None schema_: str | None = Field(None, alias="schema") otherSchema: str | None = None
[docs] class varGrpType(baseElementType): labl: list[lablType] = Field(default_factory=list) txt: list[txtType] = Field(default_factory=list) concept: list[conceptType] = Field(default_factory=list) defntn: list[simpleTextType] = Field(default_factory=list) universe: list[universeType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list) type: str | None = None otherType: str | None = None var: str | None = None varGrp: str | None = None name: str | None = None sdatrefs: str | None = None methrefs: str | None = None pubrefs: str | None = None access: str | None = None nCube: str | None = None
[docs] class verStmtType(baseElementType): version: list[versionType] = Field(default_factory=list) verResp: list[verRespType] = Field(default_factory=list) notes: list[notesType] = Field(default_factory=list)
[docs] class versionType(simpleTextAndDateType): type: str | None = None
[docs] class verRespType(simpleTextType): affiliation: str | None = None