Coverage for src/databricks/url_utils.py: 98%
40 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-06-05 22:56 -0700
« prev ^ index » next coverage.py v7.8.0, created at 2025-06-05 22:56 -0700
1"""Utilities for handling Databricks workspace URLs."""
3from __future__ import annotations
5from typing import Optional, Tuple
6from urllib.parse import urlparse
8# Mapping of cloud provider to base domain
9DATABRICKS_DOMAIN_MAP = {
10 "AWS": "cloud.databricks.com",
11 "Azure": "azuredatabricks.net",
12 "GCP": "gcp.databricks.com",
13 "Generic": "databricks.com",
14}
16# Reverse map of domain to provider for validation/detection
17DATABRICKS_DOMAINS = {v: k for k, v in DATABRICKS_DOMAIN_MAP.items()}
19# URL validation regex patterns
20# Databricks workspace IDs are typically numeric or alphanumeric
21# Common formats: long numbers, or letters followed by numbers/hyphens
22WORKSPACE_ID_PATTERN = r"^([0-9]{10,}|[a-z0-9][a-z0-9\-]*[a-z0-9]|[a-z0-9]{3,})$"
25def normalize_workspace_url(url: str) -> str:
26 """Return just the workspace identifier portion of a URL."""
27 if not url:
28 return ""
30 to_parse = url if "://" in url else f"https://{url}"
31 parsed = urlparse(to_parse)
32 host = parsed.hostname or ""
34 for domain in DATABRICKS_DOMAIN_MAP.values():
35 if host.endswith(domain):
36 host = host[: -(len(domain) + 1)]
37 break
39 return host
42def validate_workspace_url(url: str) -> Tuple[bool, Optional[str]]:
43 """Validate that ``url`` is a plausible Databricks workspace URL."""
44 if not url:
45 return False, "Workspace URL cannot be empty"
47 if not isinstance(url, str):
48 return False, "Workspace URL must be a string"
50 # Basic validation - just check it's reasonable input, let API calls handle validity
51 url_clean = url.strip()
53 # Should be reasonable length
54 if len(url_clean) < 1 or len(url_clean) > 200:
55 return False, "Workspace URL should be between 1-200 characters"
57 # No whitespace allowed
58 if " " in url_clean:
59 return False, "Workspace URL cannot contain spaces"
61 return True, None
64def get_full_workspace_url(workspace_id: str, cloud_provider: str = "AWS") -> str:
65 """Return the full workspace URL for ``workspace_id``."""
66 domain = DATABRICKS_DOMAIN_MAP.get(cloud_provider, DATABRICKS_DOMAIN_MAP["AWS"])
67 return f"https://{workspace_id}.{domain}"
70def detect_cloud_provider(url: str) -> str:
71 """Infer the cloud provider from ``url``."""
72 if not url:
73 return "AWS" # Default to AWS if no URL provided
75 for domain, provider in DATABRICKS_DOMAINS.items():
76 if domain in url:
77 return provider
78 return "AWS"
81def format_workspace_url_for_display(
82 workspace_id: str, cloud_provider: str = "AWS"
83) -> str:
84 """Format a workspace URL for display to users."""
85 return get_full_workspace_url(workspace_id, cloud_provider)