"""OSM geographic features enrichment service."""
from typing import Any
from biosample_enricher.logging_config import get_logger
from biosample_enricher.osm_features.google_provider import GooglePlacesProvider
from biosample_enricher.osm_features.models import (
CombinedFeaturesResult,
Coordinates,
OSMFeaturesResult,
)
from biosample_enricher.osm_features.provider import OSMOverpassProvider
logger = get_logger(__name__)
[docs]
class OSMFeaturesService:
"""Service for enriching locations with geographic features from multiple providers."""
[docs]
def __init__(self, default_radius_m: int = 1000, enable_google: bool = True):
"""
Initialize geographic features service.
Args:
default_radius_m: Default search radius in meters
enable_google: Whether to enable Google Places provider if available
"""
self.default_radius_m = default_radius_m
self.osm_provider = OSMOverpassProvider()
# Initialize Google provider if enabled
self.google_provider = None
if enable_google:
try:
self.google_provider = GooglePlacesProvider()
if not self.google_provider.is_available():
logger.info("Google Places API not available, using OSM only")
self.google_provider = None
except Exception as e:
logger.warning(f"Failed to initialize Google Places provider: {e}")
self.google_provider = None
[docs]
def get_combined_features_for_location(
self,
latitude: float,
longitude: float,
radius_m: int | None = None,
timeout_s: int = 180,
) -> CombinedFeaturesResult:
"""
Get geographic features from both OSM and Google Places providers.
Args:
latitude: Latitude coordinate
longitude: Longitude coordinate
radius_m: Search radius in meters (uses default if None)
timeout_s: Query timeout in seconds
Returns:
Combined features result from both providers
"""
if radius_m is None:
radius_m = self.default_radius_m
logger.info(
f"Getting combined features for {latitude}, {longitude} within {radius_m}m"
)
query_coords = Coordinates(latitude=latitude, longitude=longitude)
providers_successful = []
providers_failed = []
osm_result = None
google_result = None
# Query OSM provider
try:
osm_fetch = self.osm_provider.get_features(
latitude=latitude,
longitude=longitude,
radius_m=radius_m,
timeout_s=timeout_s,
)
if osm_fetch.ok and osm_fetch.result:
osm_result = osm_fetch.result
providers_successful.append("osm")
logger.info(
f"OSM: Found {len(osm_result.named_features)} named features"
)
else:
providers_failed.append("osm")
logger.warning(f"OSM provider failed: {osm_fetch.error}")
except Exception as e:
providers_failed.append("osm")
logger.error(f"OSM provider error: {e}")
# Query Google Places provider if available
if self.google_provider:
try:
google_fetch = self.google_provider.get_features(
latitude=latitude,
longitude=longitude,
radius_m=radius_m,
timeout_s=timeout_s,
)
if google_fetch.ok and google_fetch.result:
google_result = google_fetch.result
providers_successful.append("google_places")
logger.info(
f"Google Places: Found {len(google_result.named_features)} named features"
)
else:
providers_failed.append("google_places")
logger.warning(
f"Google Places provider failed: {google_fetch.error}"
)
except Exception as e:
providers_failed.append("google_places")
logger.error(f"Google Places provider error: {e}")
else:
logger.debug("Google Places provider not available")
# Create combined result
combined_success = len(providers_successful) > 0
return CombinedFeaturesResult(
query=query_coords,
radius_m=radius_m,
osm_result=osm_result,
google_result=google_result,
providers_successful=providers_successful,
providers_failed=providers_failed,
combined_enrichment_success=combined_success,
)
[docs]
def get_features_for_location(
self,
latitude: float,
longitude: float,
radius_m: int | None = None,
timeout_s: int = 180,
) -> OSMFeaturesResult | None:
"""
Get geographic features around a location from OSM only (for backward compatibility).
Args:
latitude: Latitude coordinate
longitude: Longitude coordinate
radius_m: Search radius in meters (uses default if None)
timeout_s: Query timeout in seconds
Returns:
OSM features result or None if failed
"""
if radius_m is None:
radius_m = self.default_radius_m
logger.info(
f"Getting OSM features for {latitude}, {longitude} within {radius_m}m"
)
# Check provider availability
if not self.osm_provider.is_available():
logger.warning("OSM Overpass API is not available")
return None
# Get features from provider
fetch_result = self.osm_provider.get_features(
latitude=latitude,
longitude=longitude,
radius_m=radius_m,
timeout_s=timeout_s,
)
if not fetch_result.ok:
logger.error(f"OSM features enrichment failed: {fetch_result.error}")
return None
result = fetch_result.result
if result:
logger.info(
f"Found {result.named_features_count} named features and "
f"{result.unnamed_categories_count} categories of unnamed features"
)
# Log some key findings
if result.named_features:
nearest = result.named_features[0]
logger.info(
f"Nearest named feature: {nearest.name} ({nearest.category.value}) "
f"at {nearest.distance_km:.3f}km"
)
return result
[docs]
def enrich_biosample_location(
self,
latitude: float,
longitude: float,
radius_m: int | None = None,
timeout_s: int = 180,
use_combined: bool = True,
) -> dict[str, Any]:
"""
Enrich a biosample location with geographic features from multiple providers.
Args:
latitude: Latitude coordinate
longitude: Longitude coordinate
radius_m: Search radius in meters (uses default if None)
timeout_s: Query timeout in seconds
use_combined: Whether to use combined providers (True) or OSM only (False)
Returns:
Dictionary suitable for biosample enrichment
"""
if radius_m is None:
radius_m = self.default_radius_m
if use_combined:
# Use combined features from both providers
combined_result = self.get_combined_features_for_location(
latitude=latitude,
longitude=longitude,
radius_m=radius_m,
timeout_s=timeout_s,
)
if not combined_result.combined_enrichment_success:
return {
"features_enrichment_success": False,
"features_error": "All geographic feature providers failed",
"features_providers_failed": combined_result.providers_failed,
}
# Convert to enrichment dictionary
enrichment = combined_result.to_enrichment_dict()
return enrichment
else:
# Use OSM only for backward compatibility
osm_result = self.get_features_for_location(
latitude=latitude,
longitude=longitude,
radius_m=radius_m,
timeout_s=timeout_s,
)
if not osm_result:
return {
"osm_features_found": 0,
"osm_enrichment_success": False,
"osm_error": "Failed to retrieve OSM features",
}
# Convert to enrichment dictionary
enrichment = osm_result.to_enrichment_dict()
enrichment["osm_enrichment_success"] = True
return enrichment
[docs]
def get_features_for_biosample(
self, biosample: dict[str, Any], radius_m: int = 1000, timeout_s: int = 180
) -> dict[str, Any]:
"""
Get OSM features for a biosample dictionary.
Args:
biosample: Biosample data dictionary
radius_m: Search radius in meters
timeout_s: Query timeout in seconds
Returns:
Enrichment result dictionary
"""
# Extract coordinates from biosample
coords = self._extract_coordinates(biosample)
if not coords:
return {
"osm_features_found": 0,
"osm_enrichment_success": False,
"osm_error": "No coordinates available",
}
latitude, longitude = coords
return self.enrich_biosample_location(
latitude=latitude,
longitude=longitude,
radius_m=radius_m,
timeout_s=timeout_s,
)
[docs]
def get_provider_status(self) -> dict[str, Any]:
"""Get status of all geographic features providers."""
status = {}
# OSM Overpass API status
osm_available = self.osm_provider.is_available()
status["osm_overpass"] = {
"name": "OpenStreetMap Overpass API",
"available": osm_available,
"attribution": "© OpenStreetMap contributors",
"base_url": self.osm_provider.base_url,
"rate_limit": "1 request per second",
"error": None if osm_available else "Service not responding",
}
# Google Places API status
if self.google_provider:
google_available = self.google_provider.is_available()
status["google_places"] = {
"name": "Google Places API",
"available": google_available,
"attribution": "Powered by Google",
"base_url": self.google_provider.base_url,
"rate_limit": "1000 requests per day (free tier)",
"error": None
if google_available
else "API key invalid or quota exceeded",
}
else:
status["google_places"] = {
"name": "Google Places API",
"available": False,
"attribution": "Powered by Google",
"base_url": "https://maps.googleapis.com/maps/api/place/nearbysearch/json",
"rate_limit": "1000 requests per day (free tier)",
"error": "API key not provided or initialization failed",
}
return status
def _extract_coordinates(
self, biosample: dict[str, Any]
) -> tuple[float, float] | None:
"""Extract coordinates from biosample data."""
# Try different coordinate field patterns
coordinate_patterns = [
# Direct lat/lon fields
("latitude", "longitude"),
("lat", "lon"),
("lat", "lng"),
# Nested location objects
("location.latitude", "location.longitude"),
("coordinates.latitude", "coordinates.longitude"),
("geo_loc.latitude", "geo_loc.longitude"),
# Array format [lat, lon]
("coordinates",),
]
for pattern in coordinate_patterns:
if len(pattern) == 2:
lat_field, lon_field = pattern
lat = self._get_nested_value(biosample, lat_field)
lon = self._get_nested_value(biosample, lon_field)
if lat is not None and lon is not None:
try:
lat_val = float(lat)
lon_val = float(lon)
if -90 <= lat_val <= 90 and -180 <= lon_val <= 180:
return lat_val, lon_val
except (ValueError, TypeError):
continue
elif len(pattern) == 1:
coords_field = pattern[0]
coords = self._get_nested_value(biosample, coords_field)
if isinstance(coords, list | tuple) and len(coords) >= 2:
try:
lat_val = float(coords[0])
lon_val = float(coords[1])
if -90 <= lat_val <= 90 and -180 <= lon_val <= 180:
return lat_val, lon_val
except (ValueError, TypeError, IndexError):
continue
return None
def _get_nested_value(self, data: dict[str, Any], field_path: str) -> Any:
"""Get value from nested dictionary using dot notation."""
keys = field_path.split(".")
value = data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
return None
return value
[docs]
def batch_enrich_locations(
self,
locations: list[tuple[float, float]],
radius_m: int = 1000,
timeout_s: int = 180,
) -> list[dict[str, Any]]:
"""
Batch enrich multiple locations with OSM features.
Args:
locations: List of (latitude, longitude) tuples
radius_m: Search radius in meters
timeout_s: Query timeout in seconds per location
Returns:
List of enrichment dictionaries
"""
results = []
for i, (lat, lon) in enumerate(locations):
if i % 10 == 0:
logger.info(f"Processing location {i + 1}/{len(locations)}")
try:
enrichment = self.enrich_biosample_location(
latitude=lat,
longitude=lon,
radius_m=radius_m,
timeout_s=timeout_s,
)
enrichment["location_index"] = i
results.append(enrichment)
except Exception as e:
logger.error(f"Error enriching location {i}: {e}")
results.append(
{
"location_index": i,
"osm_features_found": 0,
"osm_enrichment_success": False,
"osm_error": str(e),
}
)
return results