Source code for biosample_enricher.osm_features.models

"""Data models for OpenStreetMap geographic features."""

from datetime import datetime
from enum import Enum
from typing import Any

from pydantic import BaseModel, Field


[docs] class OSMElementType(str, Enum): """Types of OSM elements.""" NODE = "node" WAY = "way" RELATION = "relation"
[docs] class GeometryType(str, Enum): """Types of geometric representations.""" POINT = "point" LINESTRING = "linestring" POLYGON = "polygon" MULTIPOLYGON = "multipolygon"
[docs] class FeatureCategory(str, Enum): """Main categories of OSM features.""" NATURAL = "natural" WATERWAY = "waterway" HIGHWAY = "highway" RAILWAY = "railway" AEROWAY = "aeroway" AMENITY = "amenity" LEISURE = "leisure" LANDUSE = "landuse" BUILDING = "building" BOUNDARY = "boundary" PLACE = "place" TOURISM = "tourism" SHOP = "shop" CRAFT = "craft" OFFICE = "office" OTHER = "other"
[docs] class Coordinates(BaseModel): """Geographic coordinates.""" latitude: float = Field(ge=-90, le=90, description="Latitude coordinate") longitude: float = Field(ge=-180, le=180, description="Longitude coordinate")
[docs] class OSMNamedFeature(BaseModel): """A named geographic feature from OpenStreetMap.""" # OSM identifiers osm_type: OSMElementType = Field(description="Type of OSM element") osm_id: int = Field(description="OSM element ID") # Names and identification name: str | None = Field(default=None, description="Primary name of the feature") alt_names: list[str] = Field(default_factory=list, description="Alternative names") wikidata_id: str | None = Field(default=None, description="Wikidata identifier") wikipedia: str | None = Field(default=None, description="Wikipedia reference") # Geographic properties centroid: Coordinates | None = Field( default=None, description="Center point of the feature" ) distance_km: float | None = Field( default=None, ge=0.0, description="Distance from query point in kilometers" ) geometry_type: GeometryType | None = Field( default=None, description="Type of geometry" ) # Categorization category: FeatureCategory = Field( default=FeatureCategory.OTHER, description="Main feature category" ) subcategory: str | None = Field( default=None, description="Specific feature type within category" ) # All OSM tags tags: dict[str, str] = Field( default_factory=dict, description="Complete OSM tag set" ) # Quality indicators importance: float | None = Field( default=None, ge=0.0, le=1.0, description="Feature importance score" )
[docs] class OSMUnnamedCounts(BaseModel): """Counts of unnamed features by category and subcategory.""" key: str = Field(description="OSM tag key (e.g., 'natural', 'highway')") total_count: int = Field(ge=0, description="Total features with this key") value_counts: dict[str, dict[str, int]] = Field( default_factory=dict, description="Counts by tag value and element type: {value: {node: X, way: Y, relation: Z}}", )
[docs] class OSMQuery(BaseModel): """Parameters for an OSM Overpass query.""" center: Coordinates = Field(description="Center point of search") radius_m: int = Field(ge=1, le=50000, description="Search radius in meters") timeout_s: int = Field(ge=1, le=600, description="Query timeout in seconds")
[docs] class OSMFeaturesResult(BaseModel): """Complete result from OSM features enrichment.""" # Query information query: OSMQuery = Field(description="Query parameters used") # Results named_features: list[OSMNamedFeature] = Field( default_factory=list, description="Named features ordered by distance" ) unnamed_counts: list[OSMUnnamedCounts] = Field( default_factory=list, description="Counts of unnamed features by category" ) # Summary statistics total_elements: int = Field(ge=0, description="Total OSM elements found") named_features_count: int = Field(ge=0, description="Number of named features") unnamed_categories_count: int = Field( ge=0, description="Number of unnamed feature categories" ) total_unnamed_count: int = Field(ge=0, description="Total unnamed features") # Quality and provenance success: bool = Field(default=True, description="Whether query succeeded") error_message: str | None = Field( default=None, description="Error details if failed" ) response_time_ms: float | None = Field( default=None, description="Query response time in milliseconds" ) data_source: str = Field( default="OpenStreetMap Overpass API", description="Data source" ) query_timestamp: datetime = Field( default_factory=datetime.utcnow, description="Query execution time" )
[docs] def get_features_by_category( self, category: FeatureCategory ) -> list[OSMNamedFeature]: """Get all named features of a specific category.""" return [f for f in self.named_features if f.category == category]
[docs] def get_nearest_feature(self, category: FeatureCategory) -> OSMNamedFeature | None: """Get the nearest named feature of a specific category.""" category_features = self.get_features_by_category(category) if not category_features: return None return min( category_features, key=lambda f: f.distance_km if f.distance_km is not None else float("inf"), )
[docs] def get_feature_counts_by_category(self) -> dict[str, int]: """Get counts of unnamed features by main category.""" counts = {} for unnamed_group in self.unnamed_counts: counts[unnamed_group.key] = unnamed_group.total_count return counts
[docs] def get_distance_summary(self) -> dict[str, Any]: """Generate distance summary for key feature categories.""" summary = {} # Key categories for distance analysis key_categories = [ FeatureCategory.NATURAL, FeatureCategory.WATERWAY, FeatureCategory.HIGHWAY, FeatureCategory.AMENITY, FeatureCategory.BUILDING, ] for category in key_categories: features = self.get_features_by_category(category) if features: distances = [ f.distance_km for f in features if f.distance_km is not None ] if distances: summary[f"nearest_{category.value}_km"] = min(distances) summary[f"avg_{category.value}_km"] = sum(distances) / len( distances ) summary[f"{category.value}_within_1km"] = len( [d for d in distances if d <= 1.0] ) else: summary[f"nearest_{category.value}_km"] = 0.0 summary[f"{category.value}_within_1km"] = 0 else: summary[f"nearest_{category.value}_km"] = 0.0 summary[f"{category.value}_within_1km"] = 0 return summary
[docs] def to_enrichment_dict(self) -> dict[str, Any]: """Convert to dictionary suitable for biosample enrichment.""" enrichment: dict[str, Any] = { "osm_features_found": len(self.named_features), "osm_categories_found": self.unnamed_categories_count, "osm_total_elements": self.total_elements, "osm_query_radius_m": self.query.radius_m, } # Add distance summary distance_summary = self.get_distance_summary() enrichment.update(distance_summary) # Add nearest features for key categories key_categories = [ FeatureCategory.NATURAL, FeatureCategory.WATERWAY, FeatureCategory.HIGHWAY, FeatureCategory.AMENITY, ] for category in key_categories: nearest = self.get_nearest_feature(category) if nearest: enrichment[f"nearest_{category.value}_name"] = nearest.name or "" enrichment[f"nearest_{category.value}_type"] = nearest.subcategory or "" enrichment[f"nearest_{category.value}_distance_km"] = ( nearest.distance_km or 0.0 ) # Add feature counts by category category_counts = self.get_feature_counts_by_category() for category_name, count in category_counts.items(): enrichment[f"osm_{category_name}_count"] = count # Provider information enrichment["osm_data_source"] = self.data_source enrichment["osm_query_timestamp"] = self.query_timestamp.isoformat() return enrichment
[docs] class OSMFetchResult(BaseModel): """Internal result from OSM Overpass API fetch operation.""" ok: bool = Field(description="Whether fetch was successful") result: OSMFeaturesResult | None = Field( default=None, description="OSM features result" ) error: str | None = Field(default=None, description="Error message if failed") raw: dict[str, Any] = Field( default_factory=dict, description="Raw Overpass API response" )
# Google Places API Models
[docs] class GooglePlacesFeature(BaseModel): """A feature from Google Places API.""" google_place_id: str = Field(description="Google Place ID") name: str | None = Field(default=None, description="Feature name") types: list[str] = Field(default_factory=list, description="Google Place types") centroid: Coordinates | None = Field( default=None, description="Feature center point" ) distance_km: float | None = Field( default=None, ge=0.0, description="Distance from query point in kilometers" ) category: FeatureCategory = Field( default=FeatureCategory.OTHER, description="Mapped feature category" ) subcategory: str | None = Field( default=None, description="Primary Google Place type" ) # Google Places specific fields rating: float | None = Field( default=None, ge=0.0, le=5.0, description="Average rating" ) user_ratings_total: int | None = Field( default=None, ge=0, description="Total number of ratings" ) price_level: int | None = Field( default=None, ge=0, le=4, description="Price level (0-4)" ) business_status: str | None = Field( default=None, description="Business operational status" ) vicinity: str | None = Field(default=None, description="Simplified address") formatted_address: str | None = Field( default=None, description="Full formatted address" ) icon_url: str | None = Field(default=None, description="URL to category icon") photos: list[dict[str, Any]] = Field( default_factory=list, description="Photo references" ) plus_code: dict[str, Any] | None = Field( default=None, description="Plus code information" ) raw_data: dict[str, Any] = Field( default_factory=dict, description="Raw Google Places data" )
[docs] class GooglePlacesResult(BaseModel): """Result from Google Places API query.""" query: Coordinates = Field(description="Query coordinates") radius_m: int = Field(description="Search radius in meters") named_features: list[GooglePlacesFeature] = Field( default_factory=list, description="Named features found" ) unnamed_counts: list[dict[str, Any]] = Field( default_factory=list, description="Counts by category" ) total_features: int = Field(default=0, ge=0, description="Total features found") success: bool = Field(default=False, description="Whether query was successful") provider: str = Field(default="google_places", description="Provider identifier") error_message: str | None = Field( default=None, description="Error message if failed" )
[docs] def to_enrichment_dict(self) -> dict[str, Any]: """Convert to dictionary suitable for biosample enrichment.""" enrichment: dict[str, Any] = { "google_places_found": len(self.named_features), "google_total_features": self.total_features, "google_query_radius_m": self.radius_m, "google_enrichment_success": self.success, } # Add nearest features for key categories key_categories = [ FeatureCategory.NATURAL, FeatureCategory.AMENITY, FeatureCategory.HIGHWAY, FeatureCategory.BUILDING, ] for category in key_categories: nearest = self.get_nearest_feature(category) if nearest: enrichment[f"google_nearest_{category.value}_name"] = nearest.name or "" enrichment[f"google_nearest_{category.value}_type"] = ( nearest.subcategory or "" ) enrichment[f"google_nearest_{category.value}_distance_km"] = float( nearest.distance_km or 0.0 ) enrichment[f"google_nearest_{category.value}_rating"] = float( nearest.rating or 0.0 ) # Add feature counts by category category_counts: dict[str, int] = {} for feature in self.named_features: category_value = feature.category.value category_counts[category_value] = category_counts.get(category_value, 0) + 1 for category_name, count in category_counts.items(): enrichment[f"google_{category_name}_count"] = count return enrichment
[docs] def get_nearest_feature( self, category: FeatureCategory ) -> GooglePlacesFeature | None: """Get nearest feature of specified category.""" category_features = [f for f in self.named_features if f.category == category] if not category_features: return None # Sort by distance and return nearest category_features.sort(key=lambda f: f.distance_km or float("inf")) return category_features[0]
[docs] class GooglePlacesFetchResult(BaseModel): """Result of fetching from Google Places API.""" ok: bool = Field(description="Whether the fetch was successful") result: GooglePlacesResult | None = Field( default=None, description="Parsed result if successful" ) error: str | None = Field(default=None, description="Error message if failed") raw: dict[str, Any] = Field(default_factory=dict, description="Raw API response")
# Combined Results Model
[docs] class CombinedFeaturesResult(BaseModel): """Combined results from multiple geographic feature providers.""" query: Coordinates = Field(description="Query coordinates") radius_m: int = Field(description="Search radius in meters") osm_result: OSMFeaturesResult | None = Field( default=None, description="OSM features result" ) google_result: GooglePlacesResult | None = Field( default=None, description="Google Places result" ) providers_successful: list[str] = Field( default_factory=list, description="List of successful providers" ) providers_failed: list[str] = Field( default_factory=list, description="List of failed providers" ) combined_enrichment_success: bool = Field( default=False, description="Whether any provider succeeded" )
[docs] def to_enrichment_dict(self) -> dict[str, Any]: """Convert combined results to enrichment dictionary.""" enrichment: dict[str, Any] = { "features_query_radius_m": self.radius_m, "features_providers_successful": self.providers_successful, "features_providers_failed": self.providers_failed, "features_enrichment_success": self.combined_enrichment_success, } # Add OSM enrichment data if self.osm_result: osm_enrichment = self.osm_result.to_enrichment_dict() enrichment.update(osm_enrichment) # Add Google Places enrichment data if self.google_result: google_enrichment = self.google_result.to_enrichment_dict() enrichment.update(google_enrichment) # Calculate combined statistics total_named_features = 0 if self.osm_result: total_named_features += len(self.osm_result.named_features) if self.google_result: total_named_features += len(self.google_result.named_features) enrichment["features_total_named_combined"] = total_named_features return enrichment