Source code for gnss_product_management.specifications.format.spec
"""Raw Pydantic models and registry for format specifications.
Two distinct types live here:
``FormatSpec`` / ``FormatVersionSpec`` / ``FormatFieldDef`` / ``FormatSpecCollection``
Describe the *shape* of a file format as a reusable library entry — e.g.
"RINEX v3 has these metadata fields and these filename templates".
Loaded from the ``formats:`` section of the product spec YAML.
``FormatRegistry``
Validates and indexes a :class:`FormatSpecCollection` against a
:class:`ParameterCatalog`, providing ``get_format()`` / ``get_version()``
look-ups.
See :mod:`.format_spec` for the product-facing catalog that *resolves*
format-variant bindings into :class:`~gnss_product_management.specifications.products.product.Product`
objects.
"""
from __future__ import annotations
import re
from pathlib import Path
import yaml
from pydantic import BaseModel, ConfigDict, Field, field_validator
from gnss_product_management.specifications.parameters.parameter import ParameterCatalog
[docs]
class FormatFieldDef(BaseModel):
"""Properties of a metadata field declared inside a format version."""
pattern: str | None = None
default: str | None = None
description: str | None = None
[docs]
class FormatVersionSpec(BaseModel):
"""A single version of a format (e.g. RINEX v3, PRODUCT v1)."""
model_config = ConfigDict(extra="allow")
description: str | None = None
notes: str | None = None
metadata: dict[str, FormatFieldDef | None] = Field(default_factory=dict)
file_templates: dict[str, str] = Field(default_factory=dict)
compression: list[str] = Field(default_factory=list)
@field_validator("file_templates", mode="before")
@classmethod
def _unwrap_lists(cls, v: dict) -> dict[str, str]:
"""Accept both ``str`` and ``[str]`` from YAML."""
out: dict[str, str] = {}
for key, val in v.items():
if isinstance(val, list):
out[key] = val[0] if val else ""
else:
out[key] = val
return out
[docs]
class FormatSpec(BaseModel):
"""A top-level format definition (e.g. RINEX, PRODUCT, TABLE).
Contains the format description and a mapping of *version* →
:class:`FormatVersionSpec`. Each version in turn maps variant names
to filename templates and metadata field definitions.
.. note::
This is the *format-library* model. For the *product-facing*
model that binds a specific format+version+variant to parameter
lists and filename templates, see
:class:`~gnss_product_management.specifications.format.format_spec.FormatVariantSpec`.
"""
description: str = ""
versions: dict[str, FormatVersionSpec] = Field(default_factory=dict)
compression: list[str] = Field(default_factory=list)
[docs]
class FormatSpecCollection(BaseModel):
"""Collection of format specifications from the ``formats:`` YAML key."""
formats: dict[str, FormatSpec] = Field(default_factory=dict)
[docs]
@classmethod
def from_yaml(cls, path: str | Path) -> FormatSpecCollection:
"""Load from a YAML file.
Accepts two layouts:
1. **Wrapped** — a top-level ``formats:`` key whose value is a
mapping of format name → :class:`FormatSpec`-compatible dict.
2. **Flat** (``format_spec.yaml`` convention) — format names are
top-level keys; each entry has ``versions → variants →
{parameters, filename}`` which are converted to the
``metadata`` / ``file_templates`` expected by
:class:`FormatVersionSpec`.
Args:
path: Path to the YAML file.
Returns:
A :class:`FormatSpecCollection` instance.
"""
with open(path) as fh:
raw = yaml.safe_load(fh)
# Layout 1: explicit `formats:` wrapper
if "formats" in raw:
return cls.model_validate({"formats": raw["formats"]})
# Layout 2: flat — convert variants/filename → metadata/file_templates
formats: dict[str, FormatSpec] = {}
for fmt_name, fmt_data in raw.items():
if not isinstance(fmt_data, dict) or "versions" not in fmt_data:
continue
versions: dict[str, FormatVersionSpec] = {}
for ver_name, ver_data in (fmt_data.get("versions") or {}).items():
if not isinstance(ver_data, dict) or "variants" not in ver_data:
continue
all_metadata: dict[str, FormatFieldDef | None] = {}
file_templates: dict[str, str] = {}
for variant_name, variant_data in (ver_data.get("variants") or {}).items():
if not isinstance(variant_data, dict):
continue
if filename := variant_data.get("filename"):
file_templates[variant_name] = filename
for param in variant_data.get("parameters") or []:
pname = param.get("name")
if not pname or pname in all_metadata:
continue
pattern = param.get("pattern")
all_metadata[pname] = FormatFieldDef(pattern=pattern) if pattern else None
versions[str(ver_name)] = FormatVersionSpec(
metadata=all_metadata,
file_templates=file_templates,
)
formats[fmt_name] = FormatSpec(versions=versions)
return cls(formats=formats)
[docs]
class FormatRegistry(BaseModel):
"""Read-only registry of reusable format definitions.
Built from a :class:`FormatSpecCollection` by resolving every metadata
field's default value against a :class:`ParameterCatalog`.
Attributes:
formats: Mapping of format name to resolved :class:`FormatSpec`.
"""
formats: dict[str, FormatSpec] = Field(default_factory=dict)
[docs]
def get_format(self, name: str) -> FormatSpec:
"""Retrieve a format by name.
Args:
name: Format name.
Returns:
The matching :class:`FormatSpec`.
Raises:
KeyError: If *name* is not registered.
"""
try:
return self.formats[name]
except KeyError:
raise KeyError(f"Format {name!r} not found. Available: {sorted(self.formats)}")
[docs]
def get_version(self, format_name: str, version: str) -> FormatVersionSpec:
"""Retrieve a specific version of a format.
Args:
format_name: Format name.
version: Version identifier.
Returns:
The matching :class:`FormatVersionSpec`.
Raises:
KeyError: If the format or version is not found.
"""
fmt = self.get_format(format_name)
try:
return fmt.versions[version]
except KeyError:
raise KeyError(
f"Version {version!r} not found in format {format_name!r}. "
f"Available: {sorted(fmt.versions)}"
)
[docs]
@classmethod
def build(
cls, format_spec: FormatSpecCollection, metadata_catalog: ParameterCatalog
) -> FormatRegistry:
"""Build a :class:`FormatRegistry` by resolving metadata field defaults.
Verifies that every metadata field referenced in a format version
has either a pattern value or an entry in *metadata_catalog*.
Args:
format_spec: Raw format specification collection.
metadata_catalog: Global parameter catalog for field defaults.
Returns:
A :class:`FormatRegistry` with all fields resolved.
Raises:
AssertionError: If a field is missing both a pattern and
a catalog entry.
ValueError: If a file template placeholder has no
corresponding metadata field.
"""
format_spec_collection: dict[str, FormatSpec] = format_spec.formats
format_spec_collection_resolved: dict[str, FormatSpec] = {}
for format_spec_name, format_spec_entry in format_spec_collection.items():
format_version_spec_collection_resolved: dict[str, FormatVersionSpec] = {}
for version_name, version_spec in format_spec_entry.versions.items():
resolved_metadata = {}
if version_spec.metadata is None:
continue
for field_name, field_def in version_spec.metadata.items():
pattern = field_def.pattern if field_def is not None else None
default = field_def.default if field_def is not None else None
field_default = pattern or default
if field_default is None:
assert field_name in metadata_catalog.parameters, (
f"Field {field_name!r} not found in parameter catalog "
f"for format {format_spec_name!r} version {version_name!r}"
)
field_default = metadata_catalog.parameters[field_name].pattern
resolved_metadata[field_name] = FormatFieldDef(
pattern=field_default,
description=field_def.description if field_def is not None else None,
)
for variant_name, file_template in version_spec.file_templates.items():
matches = re.findall(r"\{(.*?)\}", file_template)
for match in matches:
if match not in resolved_metadata:
raise ValueError(
f"Placeholder {match!r} in file template "
f"{file_template!r} for format {format_spec_name!r} "
f"version {version_name!r} variant {variant_name!r} "
f"does not have a corresponding metadata field."
)
format_version_spec_collection_resolved[version_name] = FormatVersionSpec(
description=version_spec.description,
notes=version_spec.notes,
metadata=resolved_metadata,
file_templates=version_spec.file_templates,
compression=version_spec.compression,
)
format_spec_collection_resolved[format_spec_name] = FormatSpec(
description=format_spec_entry.description,
versions=format_version_spec_collection_resolved,
compression=format_spec_entry.compression,
)
return FormatRegistry(formats=format_spec_collection_resolved)