Coverage for src/chuck_data/commands/scan_pii.py: 0%

28 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-06-05 22:56 -0700

1""" 

2Command handler for bulk PII scanning. 

3 

4This module contains the handler for scanning all tables in a schema 

5for Personally Identifiable Information (PII). 

6""" 

7 

8import logging 

9from typing import Optional 

10 

11from ..clients.databricks import DatabricksAPIClient 

12from ..llm.client import LLMClient 

13from ..command_registry import CommandDefinition 

14from ..config import get_active_catalog, get_active_schema 

15from .base import CommandResult 

16from .pii_tools import _helper_scan_schema_for_pii_logic 

17 

18 

19def handle_command(client: Optional[DatabricksAPIClient], **kwargs) -> CommandResult: 

20 """ 

21 Scan all tables in a schema for PII data. 

22 

23 Args: 

24 client: API client instance 

25 **kwargs: 

26 catalog_name (str, optional): Name of the catalog 

27 schema_name (str, optional): Name of the schema 

28 """ 

29 catalog_name_arg: Optional[str] = kwargs.get("catalog_name") 

30 schema_name_arg: Optional[str] = kwargs.get("schema_name") 

31 

32 if not client: 

33 return CommandResult(False, message="Client is required for bulk PII scan.") 

34 

35 try: 

36 effective_catalog = catalog_name_arg or get_active_catalog() 

37 effective_schema = schema_name_arg or get_active_schema() 

38 

39 if not effective_catalog or not effective_schema: 

40 return CommandResult( 

41 False, 

42 message="Catalog and schema must be specified or active for bulk PII scan.", 

43 ) 

44 

45 # Create a LLM client instance to pass to the helper 

46 llm_client = LLMClient() 

47 

48 scan_summary_data = _helper_scan_schema_for_pii_logic( 

49 client, llm_client, effective_catalog, effective_schema 

50 ) 

51 if scan_summary_data.get("error"): 

52 return CommandResult( 

53 False, message=scan_summary_data["error"], data=scan_summary_data 

54 ) 

55 

56 msg = ( 

57 f"Scanned {scan_summary_data.get('tables_successfully_processed',0)}/" 

58 f"{scan_summary_data.get('tables_scanned_attempted',0)} tables in {effective_catalog}.{effective_schema}. " 

59 f"Found {scan_summary_data.get('tables_with_pii',0)} tables with {scan_summary_data.get('total_pii_columns',0)} PII columns." 

60 ) 

61 return CommandResult(True, data=scan_summary_data, message=msg) 

62 except Exception as e: 

63 logging.error(f"Bulk PII scan error: {e}", exc_info=True) 

64 return CommandResult( 

65 False, error=e, message=f"Error during bulk PII scan: {str(e)}" 

66 ) 

67 

68 

69DEFINITION = CommandDefinition( 

70 name="scan-schema-for-pii", 

71 description="Scan all tables in the current schema (or specified catalog/schema) for PII and/or customer data", 

72 handler=handle_command, 

73 parameters={ 

74 "catalog_name": { 

75 "type": "string", 

76 "description": "Optional: Name of the catalog. If not provided, uses the active catalog", 

77 }, 

78 "schema_name": { 

79 "type": "string", 

80 "description": "Optional: Name of the schema. If not provided, uses the active schema", 

81 }, 

82 }, 

83 required_params=[], 

84 tui_aliases=["/scan-pii"], 

85 visible_to_user=True, 

86 visible_to_agent=True, 

87)