"""Classes for Python objects derived from CSV files.""" import re from dataclasses import dataclass, field from typing import List from dctap.utils import coerce_integer, coerce_numeric, looks_like_uri_or_curie @dataclass class TAPStatementTemplate: """Instances hold TAP/CSV elements related to statement templates.""" # pylint: disable=too-many-instance-attributes # It's a dataclass, right? # pylint: disable=invalid-name # for elements not named in snake case. propertyID: str = "" propertyLabel: str = "" mandatory: str = "" repeatable: str = "" valueNodeType: str = "" valueDataType: str = "" valueConstraint: str = "" valueConstraintType: str = "" valueShape: str = "" note: str = "" state_warns: dict = field(default_factory=dict) state_extras: dict = field(default_factory=dict) def normalize(self, config_dict): """Normalizes specific fields.""" # pylint: disable=attribute-defined-outside-init self._normalize_booleans() self._valueConstraintType_pattern_warn_if_valueConstraint_not_valid_regex() self._valueConstraintType_pattern_warn_if_used_with_value_shape() self._valueConstraintType_iristem_parse() self._valueConstraintType_iristem_warn_if_list_items_not_IRIs() self._valueConstraintType_languageTag_parse(config_dict) self._valueConstraintType_minmaxlength_warn_if_not_nonnegative_integer() self._valueConstraintType_minmaxinclusive_parse() self._valueConstraintType_minmaxinclusive_warn_if_value_not_numeric() self._valueConstraintType_warn_if_used_without_valueConstraint() self._valueDataType_warn_if_used_with_valueNodeType_IRI() self._valueNodeType_warn_if_valueNodeType_literal_used_with_any_valueShape() self._valueConstraintType_picklist_parse(config_dict) self._valueNodeType_is_from_enumerated_list(config_dict) self._parse_elements_configured_as_picklist_elements(config_dict) return self def _warn_if_value_not_urilike(self): """Warns when values of given elements do not look like URIs.""" elements_that_may_take_uris = ["propertyID", "valueDataType", "valueShape"] for elem in elements_that_may_take_uris: value = getattr(self, elem) warning = f"Value '{value}' does not look like a URI." if value: if not looks_like_uri_or_curie(value): self.state_warns[elem] = warning return self def _normalize_booleans(self): """Coerces supported Boolean values to 'true' or 'false' or leaves unchanged.""" valid_values_for_true = ["true", "TRUE", "True", "1"] valid_values_for_false = ["false", "FALSE", "False", "0"] boolean_elements = ["mandatory", "repeatable"] for elem in boolean_elements: value = getattr(self, elem) if value: warning_message = f"'{value}' is not a supported Boolean value." if value in valid_values_for_true: setattr(self, elem, "true") elif value in valid_values_for_false: setattr(self, elem, "false") else: self.state_warns[elem] = warning_message return self def _valueConstraintType_iristem_parse(self): """If valueConstraintType is Iristem, split valueConstraint on whitespace.""" self.valueConstraintType = self.valueConstraintType.lower() if self.valueConstraintType == "iristem": if self.valueConstraint: self.valueConstraint = self.valueConstraint.split() return self def _valueConstraintType_iristem_warn_if_list_items_not_IRIs(self): """If IRIStem, warn if valueConstraint list items do not look like IRIs.""" self.valueConstraintType = self.valueConstraintType.lower() if self.valueConstraintType == "iristem": for list_item in self.valueConstraint: if not looks_like_uri_or_curie(list_item): self.state_warns["valueConstraint"] = ( f"Value constraint type is '{self.valueConstraintType}', " f"but '{list_item}' does not look like an IRI or " "Compact IRI." ) return self def _valueConstraintType_minmaxlength_warn_if_not_nonnegative_integer(self): """ Tries to coerce valueConstraint to integer (or leaves string untouched). Warns if valueConstraint for minLength is not a nonnegative integer. """ vctype = self.valueConstraintType.lower() vc = self.valueConstraint = coerce_integer(self.valueConstraint) bad_vc_warning = ( f"Value constraint type is '{self.valueConstraintType}', " f"but '{self.valueConstraint}' is not a positive integer." ) if vctype in ("minlength", "maxlength"): if isinstance(vc, int): if vc < 0: self.state_warns["valueConstraint"] = bad_vc_warning elif not isinstance(vc, int): self.state_warns["valueConstraint"] = bad_vc_warning return self def _valueConstraintType_minmaxinclusive_parse(self): """ If value of valueConstraintType is 'minInclusive' or 'maxInclusive', value of valueConstraint should be numeric (int or float). """ self.valueConstraintType = self.valueConstraintType.lower() value_constraint = self.valueConstraint if self.valueConstraintType in ("mininclusive", "maxinclusive"): if value_constraint: self.valueConstraint = coerce_numeric(value_constraint) return self def _valueConstraintType_minmaxinclusive_warn_if_value_not_numeric(self): """Warns if valueConstraint for minInclusive not coercable to float.""" self.valueConstraintType = self.valueConstraintType.lower() if self.valueConstraintType in ("mininclusive", "maxinclusive"): try: float(self.valueConstraint) except (ValueError, TypeError): self.state_warns["valueConstraint"] = ( f"Value constraint type is '{self.valueConstraintType}', " f"but '{self.valueConstraint}' is not numeric." ) return self def _valueConstraintType_pattern_warn_if_valueConstraint_not_valid_regex(self): """If valueConstraintType Pattern, warn if valueConstraint not valid regex.""" self.valueConstraintType = self.valueConstraintType.lower() if self.valueConstraintType == "pattern": try: re.compile(self.valueConstraint) except (re.error, TypeError): self.state_warns["valueConstraint"] = ( f"Value constraint type is '{self.valueConstraintType}', but " f"'{self.valueConstraint}' is not a valid regular expression." ) return self def _valueConstraintType_pattern_warn_if_used_with_value_shape(self): """Regular expressions cannot conform to value shapes.""" self.valueConstraintType = self.valueConstraintType.lower() if self.valueConstraintType == "pattern": if self.valueShape: self.state_warns["valueConstraintType"] = ( f"Values of constraint type '{self.valueConstraintType}' " "cannot conform to a value shape." ) def _valueConstraintType_languageTag_parse(self, config_dict): """For valueConstraintType languageTag, splits valueConstraint on whitespace.""" self.valueConstraintType = self.valueConstraintType.lower() sep = config_dict.get("picklist_item_separator", " ") if self.valueConstraintType == "languagetag": if self.valueConstraint: self.valueConstraint = self.valueConstraint.split(sep) self.valueConstraint = [x.strip() for x in self.valueConstraint if x] return self def _valueConstraintType_warn_if_used_without_valueConstraint(self): """Warns if valueConstraintType used without valueConstraint.""" if self.valueConstraintType: if not self.valueConstraint: self.state_warns["valueConstraint"] = ( f"Value constraint type '{self.valueConstraintType}' " "has no corresponding value constraint." ) return self def _valueConstraintType_picklist_parse(self, config_dict): """If valueConstraintType is Picklist, split valueConstraint on whitespace.""" self.valueConstraintType = self.valueConstraintType.lower() sep = config_dict.get("picklist_item_separator", " ") if self.valueConstraintType == "picklist": if self.valueConstraint: self.valueConstraint = self.valueConstraint.split(sep) self.valueConstraint = [x.strip() for x in self.valueConstraint if x] return self def _valueNodeType_is_from_enumerated_list(self, config_dict): """Take valueNodeType from configurable enumerated list, case-insensitive.""" warning = f"'{self.valueNodeType}' is not a valid node type." valid_types = ["iri", "bnode", "literal"] # This should be moved out to defaults dictionary. if config_dict.get("extra_value_node_types"): valid_types += [v.lower() for v in config_dict["extra_value_node_types"]] if self.valueNodeType: self.valueNodeType = self.valueNodeType.lower() if self.valueNodeType not in valid_types: self.state_warns["valueNodeType"] = warning return self def _valueNodeType_warn_if_valueNodeType_literal_used_with_any_valueShape(self): """Value with node type Literal cannot conform to a value shape.""" warning = "Values of node type 'literal' cannot conform to value shapes." self.valueNodeType = self.valueNodeType.lower() if self.valueShape: if self.valueNodeType == "literal": self.state_warns["valueDataType"] = warning return self def _valueDataType_warn_if_used_with_valueShape(self): """Value with any datatype cannot conform to a value shape.""" warning = "Values with datatypes (literals) cannot conform to value shapes." if self.valueShape: if self.valueDataType: self.state_warns["valueDataType"] = warning return self def _valueDataType_warn_if_used_with_valueNodeType_IRI(self): """Value with datatype implies Literal and cannot be node type IRI.""" node_type = self.valueNodeType data_type = self.valueDataType warning = f"Datatype '{data_type}' incompatible with node type '{node_type}'." node_type = self.valueNodeType.lower() if node_type in ("iri", "uri", "bnode"): if self.valueDataType: self.state_warns["valueDataType"] = warning return self def _parse_elements_configured_as_picklist_elements(self, config_dict): """Parse elements configured as list elementss.""" if config_dict.get("picklist_item_separator"): separator = config_dict.get("picklist_item_separator") else: separator = " " if config_dict.get("picklist_elements"): picklist_elements = config_dict.get("picklist_elements") else: picklist_elements = [] for element in picklist_elements: if getattr(self, element): setattr(self, element, getattr(self, element).split(separator)) return self def get_warnings(self): """Emit self.state_warns as populated by self.normalize().""" return dict(self.state_warns) @dataclass class TAPShape: """An instance holds TAP/CSV row elements related to one given, named shape.""" # pylint: disable=invalid-name # True that propertyID, etc, do not conform to snake-case naming style. shapeID: str = "" shapeLabel: str = "" state_list: List[TAPStatementTemplate] = field(default_factory=list) shape_warns: dict = field(default_factory=dict) shape_extras: dict = field(default_factory=dict) def normalize(self, config_dict): """Normalize values where required.""" self._normalize_default_shapeID(config_dict) self._warn_if_value_not_urilike() return self def _normalize_default_shapeID(self, config_dict): """If shapeID not specified, looks first in config, else sets "default".""" if not self.shapeID: self.shapeID = config_dict.get("default_shape_identifier", "default") return self def _warn_if_value_not_urilike(self): """Warns when values of given elements do not look like URIs.""" elements_that_may_take_uris = ["shapeID"] for elem in elements_that_may_take_uris: value = getattr(self, elem) warning = f"Value '{value}' does not look like a URI." if value: if not looks_like_uri_or_curie(value): self.shape_warns[elem] = warning return self def get_warnings(self): """Emit warnings dictionary self.shape_warns, populated by normalize() method.""" return dict(self.shape_warns)