Source code for biosample_enricher.soil.models

"""Pydantic models for soil enrichment data."""

from datetime import datetime
from typing import Any

from pydantic import BaseModel, Field, field_validator


[docs] class SoilObservation(BaseModel): """Individual soil measurement or prediction at a location.""" # Soil Classification classification_usda: str | None = Field( None, description="USDA Soil Taxonomy classification (e.g., 'Typic Haplocryepts')", ) classification_wrb: str | None = Field( None, description="WRB soil classification (e.g., 'Cambisols')" ) confidence_usda: float | None = Field( None, ge=0.0, le=1.0, description="Confidence in USDA classification (0-1)" ) confidence_wrb: float | None = Field( None, ge=0.0, le=1.0, description="Confidence in WRB classification (0-1)" ) # Physical Properties ph_h2o: float | None = Field( None, ge=0.0, le=14.0, description="Soil pH in water (0-14 scale)" ) organic_carbon: float | None = Field( None, ge=0.0, description="Soil organic carbon content (g/kg)" ) bulk_density: float | None = Field( None, ge=0.0, description="Soil bulk density (g/cm³)" ) # Texture Properties sand_percent: float | None = Field( None, ge=0.0, le=100.0, description="Sand content percentage" ) silt_percent: float | None = Field( None, ge=0.0, le=100.0, description="Silt content percentage" ) clay_percent: float | None = Field( None, ge=0.0, le=100.0, description="Clay content percentage" ) texture_class: str | None = Field( None, description="USDA texture class (e.g., 'Loam', 'Sandy clay loam')" ) # Chemical Properties total_nitrogen: float | None = Field( None, ge=0.0, description="Total nitrogen content (g/kg)" ) available_phosphorus: float | None = Field( None, ge=0.0, description="Available phosphorus content (mg/kg)" ) cation_exchange_capacity: float | None = Field( None, ge=0.0, description="Cation exchange capacity (cmol(c)/kg)" ) # Measurement Context depth_cm: str | None = Field( None, description="Depth interval (e.g., '0-5cm', '5-15cm')" ) measurement_method: str | None = Field( None, description="Measurement or prediction method" )
[docs] @field_validator("texture_class") @classmethod def validate_texture_class(cls, v: str | None) -> str | None: """Validate USDA texture class names.""" if v is None: return None valid_classes = { "Sand", "Loamy sand", "Sandy loam", "Loam", "Silt loam", "Silt", "Sandy clay loam", "Clay loam", "Silty clay loam", "Sandy clay", "Silty clay", "Clay", } if v not in valid_classes: # Allow for variations in casing/formatting normalized = v.title() if normalized not in valid_classes: raise ValueError( f"Invalid texture class: {v}. Must be one of {valid_classes}" ) return normalized return v
[docs] class SoilResult(BaseModel): """Results from soil enrichment for a specific location.""" # Location Context latitude: float = Field(description="Latitude of measurement location") longitude: float = Field(description="Longitude of measurement location") distance_m: float | None = Field( None, ge=0.0, description="Distance from requested location to measurement (meters)", ) # Soil Data observations: list[SoilObservation] = Field( default_factory=list, description="Soil observations at different depths" ) # Data Quality quality_score: float = Field( ge=0.0, le=1.0, description="Overall data quality score (0-1)" ) provider: str = Field(description="Data source provider") retrieved_at: datetime = Field( default_factory=datetime.utcnow, description="When the data was retrieved" ) # Error Information errors: list[str] = Field( default_factory=list, description="Any errors encountered during retrieval" ) warnings: list[str] = Field( default_factory=list, description="Any warnings about data quality" )
[docs] def to_nmdc_schema(self) -> dict[str, Any]: """Convert to NMDC biosample schema format.""" result: dict[str, Any] = {} # Get surface soil observation (0-5cm or first available) surface_obs = self._get_surface_observation() if not surface_obs: return result # Soil type mapping if surface_obs.classification_usda or surface_obs.classification_wrb: classifications = [] if surface_obs.classification_usda: classifications.append(f"{surface_obs.classification_usda} [USDA]") if surface_obs.classification_wrb: classifications.append(f"{surface_obs.classification_wrb} [WRB]") result["soil_type"] = { "has_raw_value": " / ".join(classifications), "type": "nmdc:TextValue", } # pH mapping if surface_obs.ph_h2o is not None: result["ph"] = { "has_numeric_value": surface_obs.ph_h2o, "has_unit": "pH", "type": "nmdc:QuantityValue", } # Texture method mapping if surface_obs.texture_class: result["soil_texture_meth"] = ( f"USDA texture classification: {surface_obs.texture_class}" ) # Add source information result["_soil_enrichment_source"] = self.provider result["_soil_enrichment_quality"] = float(self.quality_score) return result
[docs] def to_gold_schema(self) -> dict[str, Any]: """Convert to GOLD biosample schema format.""" result: dict[str, Any] = {} surface_obs = self._get_surface_observation() if not surface_obs: return result # Add to habitat description habitat_details = [] if surface_obs.classification_usda: habitat_details.append(f"Soil: {surface_obs.classification_usda}") if surface_obs.texture_class: habitat_details.append(f"Texture: {surface_obs.texture_class}") if surface_obs.ph_h2o is not None: habitat_details.append(f"pH: {surface_obs.ph_h2o}") if habitat_details: result["habitatDetails"] = "; ".join(habitat_details) # Environmental parameters env_params: dict[str, float] = {} if surface_obs.ph_h2o is not None: env_params["soil_ph"] = surface_obs.ph_h2o if surface_obs.organic_carbon is not None: env_params["soil_organic_carbon_g_kg"] = surface_obs.organic_carbon if surface_obs.total_nitrogen is not None: env_params["soil_total_nitrogen_g_kg"] = surface_obs.total_nitrogen if env_params: result["environmentalParameters"] = env_params return result
def _get_surface_observation(self) -> SoilObservation | None: """Get the surface soil observation (0-5cm preferred).""" if not self.observations: return None # Prefer 0-5cm depth if available for obs in self.observations: if obs.depth_cm and "0-5" in obs.depth_cm: return obs # Return first observation as fallback return self.observations[0]
# USDA Texture Classification Constants USDA_TEXTURE_CLASSES = { "Sand": {"sand": (85, 100), "clay": (0, 10)}, "Loamy sand": {"sand": (70, 85), "clay": (0, 15)}, "Sandy loam": {"sand": (50, 70), "clay": (0, 20)}, "Loam": {"sand": (23, 52), "clay": (7, 27), "silt": (28, 50)}, "Silt loam": {"sand": (0, 50), "clay": (0, 27), "silt": (50, 88)}, "Silt": {"sand": (0, 20), "clay": (0, 12), "silt": (80, 100)}, "Sandy clay loam": {"sand": (45, 80), "clay": (20, 35)}, "Clay loam": {"sand": (20, 45), "clay": (27, 40)}, "Silty clay loam": {"sand": (0, 20), "clay": (27, 40)}, "Sandy clay": {"sand": (45, 65), "clay": (35, 55)}, "Silty clay": {"sand": (0, 20), "clay": (40, 60)}, "Clay": {"sand": (0, 45), "clay": (40, 100)}, }
[docs] def classify_texture(sand_pct: float, silt_pct: float, clay_pct: float) -> str: """Classify soil texture using USDA texture triangle. Args: sand_pct: Sand percentage (0-100) silt_pct: Silt percentage (0-100) clay_pct: Clay percentage (0-100) Returns: USDA texture class name Raises: ValueError: If percentages don't sum to ~100% or are invalid """ # Validate inputs total = sand_pct + silt_pct + clay_pct if not (95 <= total <= 105): # Allow for small rounding errors raise ValueError(f"Sand + silt + clay must sum to ~100%, got {total}%") if any(pct < 0 or pct > 100 for pct in [sand_pct, silt_pct, clay_pct]): raise ValueError("All percentages must be between 0 and 100") # Normalize to exactly 100% factor = 100.0 / total sand = sand_pct * factor silt = silt_pct * factor clay = clay_pct * factor # Apply USDA texture triangle rules # Rules are applied in order of specificity if clay >= 40: if sand >= 45: return "Sandy clay" elif silt >= 40: return "Silty clay" else: return "Clay" elif clay >= 27: if sand >= 45: return "Sandy clay loam" elif silt >= 28 and silt < 50: return "Clay loam" else: # silt >= 50 return "Silty clay loam" elif clay >= 20: if sand >= 45: return "Sandy clay loam" else: return "Clay loam" elif silt >= 80: return "Silt" elif silt >= 50: return "Silt loam" elif sand >= 85: return "Sand" elif sand >= 70: return "Loamy sand" elif sand >= 50: return "Sandy loam" else: return "Loam"