Source code for biosample_enricher.land.models

"""Data models for land cover and vegetation enrichment."""

from datetime import date
from typing import Any

from pydantic import BaseModel, Field, field_validator


[docs] class LandCoverObservation(BaseModel): """Land cover classification from a specific provider.""" provider: str = Field(..., description="Data provider name") actual_location: dict[str, float] = Field( ..., description="Actual pixel/data location" ) distance_m: float = Field( ..., ge=0.0, description="Distance from requested location (meters)" ) actual_date: date | None = Field(None, description="Date of land cover data") temporal_offset_days: int | None = Field( None, description="Days offset from requested date" ) # Classification data class_code: str | None = Field(None, description="Raw classification code") class_label: str | None = Field(None, description="Human-readable class label") classification_system: str | None = Field( None, description="Classification scheme name" ) # Quality metrics confidence: float | None = Field( None, ge=0.0, le=1.0, description="Confidence score" ) resolution_m: float | None = Field( None, gt=0.0, description="Spatial resolution (meters)" ) # Additional metadata dataset_version: str | None = Field(None, description="Dataset version/year") quality_flags: list[str] = Field( default_factory=list, description="Quality control flags" )
[docs] @field_validator("actual_location") @classmethod def validate_location(cls, v: dict[str, float]) -> dict[str, float]: """Validate location coordinates.""" if "lat" not in v or "lon" not in v: raise ValueError("Location must contain 'lat' and 'lon' keys") lat, lon = v["lat"], v["lon"] if not (-90 <= lat <= 90): raise ValueError(f"Latitude must be between -90 and 90, got {lat}") if not (-180 <= lon <= 180): raise ValueError(f"Longitude must be between -180 and 180, got {lon}") return v
[docs] class VegetationObservation(BaseModel): """Vegetation indices from a specific provider.""" provider: str = Field(..., description="Data provider name") actual_location: dict[str, float] = Field( ..., description="Actual pixel/data location" ) distance_m: float = Field( ..., ge=0.0, description="Distance from requested location (meters)" ) actual_date: date | None = Field(None, description="Date of vegetation data") temporal_offset_days: int | None = Field( None, description="Days offset from requested date" ) # Vegetation indices ndvi: float | None = Field( None, ge=-1.0, le=1.0, description="Normalized Difference Vegetation Index" ) evi: float | None = Field( None, ge=-1.0, le=1.0, description="Enhanced Vegetation Index" ) lai: float | None = Field(None, ge=0.0, description="Leaf Area Index") fpar: float | None = Field( None, ge=0.0, le=1.0, description="Fraction of Photosynthetically Active Radiation", ) # Quality metrics confidence: float | None = Field( None, ge=0.0, le=1.0, description="Confidence score" ) resolution_m: float | None = Field( None, gt=0.0, description="Spatial resolution (meters)" ) # Additional metadata composite_period: str | None = Field( None, description="Temporal composite period (e.g., '16-day')" ) quality_flags: list[str] = Field( default_factory=list, description="Quality control flags" )
[docs] @field_validator("actual_location") @classmethod def validate_location(cls, v: dict[str, float]) -> dict[str, float]: """Validate location coordinates.""" if "lat" not in v or "lon" not in v: raise ValueError("Location must contain 'lat' and 'lon' keys") lat, lon = v["lat"], v["lon"] if not (-90 <= lat <= 90): raise ValueError(f"Latitude must be between -90 and 90, got {lat}") if not (-180 <= lon <= 180): raise ValueError(f"Longitude must be between -180 and 180, got {lon}") return v
[docs] class LandResult(BaseModel): """Complete land cover and vegetation enrichment result.""" # Request metadata requested_location: dict[str, float] = Field( ..., description="Originally requested coordinates" ) requested_date: date | None = Field(None, description="Originally requested date") # Land cover results from all providers land_cover: list[LandCoverObservation] = Field( default_factory=list, description="Land cover classifications" ) # Vegetation index results from all providers vegetation: list[VegetationObservation] = Field( default_factory=list, description="Vegetation indices" ) # Overall quality metrics overall_quality_score: float = Field( ..., ge=0.0, le=1.0, description="Aggregate quality score" ) providers_attempted: list[str] = Field( default_factory=list, description="All providers attempted" ) providers_successful: list[str] = Field( default_factory=list, description="Providers that returned data" ) # Error tracking errors: list[str] = Field( default_factory=list, description="Error messages from failed providers" ) warnings: list[str] = Field(default_factory=list, description="Warning messages")
[docs] @field_validator("requested_location") @classmethod def validate_requested_location(cls, v: dict[str, float]) -> dict[str, float]: """Validate requested location coordinates.""" if "lat" not in v or "lon" not in v: raise ValueError("Requested location must contain 'lat' and 'lon' keys") lat, lon = v["lat"], v["lon"] if not (-90 <= lat <= 90): raise ValueError(f"Latitude must be between -90 and 90, got {lat}") if not (-180 <= lon <= 180): raise ValueError(f"Longitude must be between -180 and 180, got {lon}") return v
[docs] def to_nmdc_schema(self) -> dict[str, Any]: """Convert to NMDC schema format.""" nmdc_data: dict[str, Any] = {} # Add current vegetation field if we have land cover data if self.land_cover: # Use highest confidence land cover classification best_lc = max(self.land_cover, key=lambda x: x.confidence or 0.0) if best_lc.class_label: nmdc_data["cur_vegetation"] = { "has_raw_value": best_lc.class_label, "type": "nmdc:TextValue", } # Add vegetation indices if self.vegetation: # Use temporally closest vegetation data best_veg = min( [v for v in self.vegetation if v.temporal_offset_days is not None], key=lambda x: abs(x.temporal_offset_days), default=self.vegetation[0] if self.vegetation else None, ) if best_veg: if best_veg.ndvi is not None: nmdc_data["ndvi"] = { "has_numeric_value": best_veg.ndvi, "has_unit": "1", "type": "nmdc:QuantityValue", } if best_veg.evi is not None: nmdc_data["evi"] = { "has_numeric_value": best_veg.evi, "has_unit": "1", "type": "nmdc:QuantityValue", } if best_veg.lai is not None: nmdc_data["lai"] = { "has_numeric_value": best_veg.lai, "has_unit": "m^2/m^2", "type": "nmdc:QuantityValue", } return nmdc_data
[docs] def to_gold_schema(self) -> dict[str, Any]: """Convert to GOLD schema format.""" gold_data: dict[str, Any] = {} # Combine land cover and vegetation information habitat_details = [] env_params = {} # Add land cover information if self.land_cover: for lc in self.land_cover: if lc.class_label: detail = f"Land cover: {lc.class_label}" if lc.provider: detail += f" ({lc.provider})" habitat_details.append(detail) # Add vegetation indices to environmental parameters if self.vegetation: for veg in self.vegetation: provider_prefix = veg.provider.lower().replace(" ", "_") if veg.ndvi is not None: env_params[f"{provider_prefix}_ndvi"] = veg.ndvi if veg.evi is not None: env_params[f"{provider_prefix}_evi"] = veg.evi if veg.lai is not None: env_params[f"{provider_prefix}_lai"] = veg.lai if veg.fpar is not None: env_params[f"{provider_prefix}_fpar"] = veg.fpar if habitat_details: gold_data["habitatDetails"] = "; ".join(habitat_details) if env_params: gold_data["environmentalParameters"] = env_params return gold_data