Source code for biosample_enricher.forward_geocoding.service

"""Forward geocoding service for coordinating multiple providers (place names to coordinates)."""

from typing import Any

from biosample_enricher.forward_geocoding.models import ForwardGeocodeResult
from biosample_enricher.forward_geocoding.providers.base import ForwardGeocodingProvider
from biosample_enricher.forward_geocoding.providers.google import (
    GoogleForwardGeocodingProvider,
)
from biosample_enricher.forward_geocoding.providers.osm import (
    OSMForwardGeocodingProvider,
)
from biosample_enricher.logging_config import get_logger

logger = get_logger(__name__)


[docs] class ForwardGeocodingService: """Service for managing forward geocoding providers (place names to coordinates)."""
[docs] def __init__(self) -> None: """Initialize the forward geocoding service.""" self.providers: dict[str, ForwardGeocodingProvider] = {} self._initialize_providers()
def _initialize_providers(self) -> None: """Initialize available forward geocoding providers.""" # Initialize OSM provider (always available) try: osm_provider = OSMForwardGeocodingProvider() self.providers["osm"] = osm_provider logger.info("Initialized OSM forward geocoding provider") except Exception as e: logger.error(f"Failed to initialize OSM provider: {e}") # Initialize Google provider if API key is available try: google_provider = GoogleForwardGeocodingProvider() self.providers["google"] = google_provider logger.info("Initialized Google forward geocoding provider") except ValueError as e: logger.warning(f"Google provider not available: {e}") except Exception as e: logger.error(f"Failed to initialize Google provider: {e}")
[docs] def get_available_providers(self) -> list[str]: """Get list of available provider names.""" return list(self.providers.keys())
[docs] def get_provider(self, name: str) -> ForwardGeocodingProvider | None: """Get a specific provider by name.""" return self.providers.get(name)
[docs] def get_provider_status(self) -> dict[str, dict[str, Any]]: """Get status information for all providers.""" status = {} for name, provider in self.providers.items(): try: available = provider.is_available() status[name] = { "name": provider.name, "available": available, "attribution": provider.attribution, "error": None, } except Exception as e: status[name] = { "name": getattr(provider, "name", name), "available": False, "attribution": getattr(provider, "attribution", None), "error": str(e), } return status
[docs] def geocode( self, query: str, provider: str | None = None, *, read_from_cache: bool = True, write_to_cache: bool = True, timeout_s: float = 30.0, language: str = "en", country_codes: list[str] | None = None, max_results: int = 10, ) -> ForwardGeocodeResult | None: """ Perform forward geocoding to convert place name to coordinates. Args: query: Place name or address to search for provider: Provider name (None for auto-selection) read_from_cache: Whether to read from cache write_to_cache: Whether to write to cache timeout_s: Request timeout in seconds language: Language code for results country_codes: List of ISO country codes to restrict search max_results: Maximum number of results Returns: Forward geocoding result or None if failed """ if not query or not query.strip(): logger.warning("Empty geocoding query provided") return None # Auto-select provider if not specified if provider is None: provider = self._select_best_provider() if provider not in self.providers: logger.error(f"Provider '{provider}' not available") return None geocoding_provider = self.providers[provider] # Check if provider is available if not geocoding_provider.is_available(): logger.warning(f"Provider '{provider}' is not available, trying fallback") # Try fallback provider fallback_provider = self._get_fallback_provider(provider) if fallback_provider and fallback_provider in self.providers: geocoding_provider = self.providers[fallback_provider] provider = fallback_provider else: logger.error("No available providers for forward geocoding") return None try: logger.info(f"Forward geocoding '{query}' using {provider}") # Perform search fetch_result = geocoding_provider.search( query, _read_from_cache=read_from_cache, _write_to_cache=write_to_cache, timeout_s=timeout_s, language=language, country_codes=country_codes, max_results=max_results, ) if not fetch_result.ok: logger.error(f"Forward geocoding failed: {fetch_result.error}") return None return fetch_result.result except Exception as e: logger.error(f"Forward geocoding error with {provider}: {e}") return None
[docs] def geocode_multiple( self, query: str, providers: list[str] | None = None, *, read_from_cache: bool = True, write_to_cache: bool = True, timeout_s: float = 30.0, language: str = "en", country_codes: list[str] | None = None, max_results: int = 5, ) -> dict[str, ForwardGeocodeResult]: """ Perform forward geocoding using multiple providers for comparison. Args: query: Place name or address to search for providers: List of provider names (None for all available) read_from_cache: Whether to read from cache write_to_cache: Whether to write to cache timeout_s: Request timeout in seconds language: Language code for results country_codes: List of ISO country codes to restrict search max_results: Maximum results per provider Returns: Dictionary mapping provider names to results """ if providers is None: providers = self.get_available_providers() results = {} for provider_name in providers: if provider_name not in self.providers: logger.warning(f"Provider '{provider_name}' not available") continue try: result = self.geocode( query, provider=provider_name, read_from_cache=read_from_cache, write_to_cache=write_to_cache, timeout_s=timeout_s, language=language, country_codes=country_codes, max_results=max_results, ) if result: results[provider_name] = result except Exception as e: logger.error(f"Provider {provider_name} failed: {e}") continue return results
[docs] def get_coordinates_for_place( self, place_name: str, prefer_provider: str | None = None, language: str = "en", country_hint: str | None = None, ) -> dict[str, Any]: """ Get coordinates and enrichment data for a biosample place name. This is the main method for biosample enrichment - converts place names from metadata into precise coordinates. Args: place_name: Name of place/location from biosample metadata prefer_provider: Preferred provider name language: Language code for results country_hint: ISO country code hint for better results Returns: Dictionary with coordinates and administrative information """ if not place_name or not place_name.strip(): return {} country_codes = [country_hint] if country_hint else None # Try multiple providers to get best results providers_to_try = [] if prefer_provider and prefer_provider in self.providers: providers_to_try.append(prefer_provider) # Add other providers as fallbacks for provider in ["google", "osm"]: if provider != prefer_provider and provider in self.providers: providers_to_try.append(provider) enrichment_data = {} errors = [] for provider_name in providers_to_try: try: result = self.geocode( place_name, provider=provider_name, language=language, country_codes=country_codes, max_results=1, # Just need the best match ) if result and result.locations: # Get enrichment data from best match enrichment_data = result.to_enrichment_dict() enrichment_data["providers_attempted"] = providers_to_try enrichment_data["providers_successful"] = [provider_name] logger.info( f"Successfully geocoded '{place_name}' using {provider_name}" ) return enrichment_data except Exception as e: error_msg = f"{provider_name}: {str(e)}" errors.append(error_msg) logger.warning( f"Provider {provider_name} failed for '{place_name}': {e}" ) continue # No successful geocoding logger.warning(f"Failed to geocode '{place_name}' with any provider") return { "providers_attempted": providers_to_try, "providers_successful": [], "errors": errors, }
def _select_best_provider(self) -> str: """Select the best available provider.""" # Prefer Google if available (more accurate), fallback to OSM if "google" in self.providers: try: if self.providers["google"].is_available(): return "google" except Exception: pass if "osm" in self.providers: try: if self.providers["osm"].is_available(): return "osm" except Exception: pass # Return first available provider as last resort for name, provider in self.providers.items(): try: if provider.is_available(): return name except Exception: continue raise RuntimeError("No forward geocoding providers available") def _get_fallback_provider(self, primary_provider: str) -> str | None: """Get fallback provider if primary fails.""" if primary_provider == "google": return "osm" elif primary_provider == "osm": return "google" else: # For any other provider, try google first, then osm if "google" in self.providers: return "google" elif "osm" in self.providers: return "osm" return None