Coverage for src/commands/scan_pii.py: 100%
28 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-06-05 22:56 -0700
« prev ^ index » next coverage.py v7.8.0, created at 2025-06-05 22:56 -0700
1"""
2Command handler for bulk PII scanning.
4This module contains the handler for scanning all tables in a schema
5for Personally Identifiable Information (PII).
6"""
8import logging
9from typing import Optional
11from src.clients.databricks import DatabricksAPIClient
12from src.llm.client import LLMClient
13from src.command_registry import CommandDefinition
14from src.config import get_active_catalog, get_active_schema
15from .base import CommandResult
16from .pii_tools import _helper_scan_schema_for_pii_logic
19def handle_command(client: Optional[DatabricksAPIClient], **kwargs) -> CommandResult:
20 """
21 Scan all tables in a schema for PII data.
23 Args:
24 client: API client instance
25 **kwargs:
26 catalog_name (str, optional): Name of the catalog
27 schema_name (str, optional): Name of the schema
28 """
29 catalog_name_arg: Optional[str] = kwargs.get("catalog_name")
30 schema_name_arg: Optional[str] = kwargs.get("schema_name")
32 if not client:
33 return CommandResult(False, message="Client is required for bulk PII scan.")
35 try:
36 effective_catalog = catalog_name_arg or get_active_catalog()
37 effective_schema = schema_name_arg or get_active_schema()
39 if not effective_catalog or not effective_schema:
40 return CommandResult(
41 False,
42 message="Catalog and schema must be specified or active for bulk PII scan.",
43 )
45 # Create a LLM client instance to pass to the helper
46 llm_client = LLMClient()
48 scan_summary_data = _helper_scan_schema_for_pii_logic(
49 client, llm_client, effective_catalog, effective_schema
50 )
51 if scan_summary_data.get("error"):
52 return CommandResult(
53 False, message=scan_summary_data["error"], data=scan_summary_data
54 )
56 msg = (
57 f"Scanned {scan_summary_data.get('tables_successfully_processed',0)}/"
58 f"{scan_summary_data.get('tables_scanned_attempted',0)} tables in {effective_catalog}.{effective_schema}. "
59 f"Found {scan_summary_data.get('tables_with_pii',0)} tables with {scan_summary_data.get('total_pii_columns',0)} PII columns."
60 )
61 return CommandResult(True, data=scan_summary_data, message=msg)
62 except Exception as e:
63 logging.error(f"Bulk PII scan error: {e}", exc_info=True)
64 return CommandResult(
65 False, error=e, message=f"Error during bulk PII scan: {str(e)}"
66 )
69DEFINITION = CommandDefinition(
70 name="scan-schema-for-pii",
71 description="Scan all tables in the current schema (or specified catalog/schema) for PII and/or customer data",
72 handler=handle_command,
73 parameters={
74 "catalog_name": {
75 "type": "string",
76 "description": "Optional: Name of the catalog. If not provided, uses the active catalog",
77 },
78 "schema_name": {
79 "type": "string",
80 "description": "Optional: Name of the schema. If not provided, uses the active schema",
81 },
82 },
83 required_params=[],
84 tui_aliases=["/scan-pii"],
85 visible_to_user=True,
86 visible_to_agent=True,
87)