Module shaystack.providers.db_mysql
Save Haystack ontology in SQLite database with JSon extension. Convert the haystack filter to sqlite SQL equivalent syntax.
Expand source code
# -*- coding: utf-8 -*-
# MYSql db driver
# See the accompanying LICENSE file.
# (C) 2021 Engie Digital
#
# vim: set ts=4 sts=4 et tw=78 sw=4 si:
"""
Save Haystack ontology in SQLite database with JSon extension.
Convert the haystack filter to sqlite SQL equivalent syntax.
"""
import itertools
import json
import logging
import textwrap
from dataclasses import dataclass
from datetime import datetime, date, time
from typing import Dict, Any, Optional, List, Tuple, Union, Iterator, Callable, cast
import pytz
from .sqldb_protocol import DBCursor
from .. import parse_filter, jsondumper, Quantity, Ref
from ..filter_ast import FilterNode, FilterUnary, FilterBinary, FilterPath
log = logging.getLogger("db.Provider")
_map_operator = \
{"==": "=",
"!=": "!="
}
def _use_inner_join(node: FilterNode) -> bool:
""" Return True if the tree must use inner join """
if isinstance(node, FilterUnary):
return len(cast(FilterPath, node.right).paths) > 1
if isinstance(node, FilterBinary):
if isinstance(node.left, _FilterDate):
return False
return _use_inner_join(node.left) or _use_inner_join(node.right)
return False
def _generate_path(table_name: str,
customer_id: str,
version: datetime,
select: List[Union[str, List[Any]]],
where: List[Union[str, List[Any]]],
node: FilterPath,
num_table: int) -> Tuple[int, List[Union[str, List[Any]]], List[Union[str, List[Any]]]]:
if len(node.paths) == 1:
return num_table, select, where
first = True
for path in node.paths[:-1]:
num_table += 1
if first:
select.append(f"INNER JOIN {table_name} AS t{num_table} ON\n")
else:
select.append("".join(_flatten(where)) + ")\n")
where = []
select.append(f"INNER JOIN {table_name} AS t{num_table} ON\n")
where.append('(')
where.extend(
['(',
_select_version(version, num_table),
f"AND t{num_table}.customer_id='{customer_id}'\n",
f"AND t{num_table - 1}.entity->'$.{path}' = "
f"t{num_table}.entity->'$.id')\n"
])
first = False
where.append("AND ")
return num_table, select, where
def _generate_filter_in_sql(table_name: str,
customer_id: str,
version: datetime,
select: List[Union[str, List[Any]]],
where: List[Union[str, List[Any]]],
node: FilterNode,
num_table: int
) -> Tuple[int, List[Union[str, List[Any]]], List[Union[str, List[Any]]]]:
# Use RootBlock nodes
if isinstance(node, _FilterDate):
where.extend(
["(",
_select_version(node.version, node.num_table),
f"AND t{node.num_table}.customer_id='{node.customer_id}')\n"
])
elif isinstance(node, FilterUnary):
if node.operator == "has":
assert isinstance(node.right, FilterPath)
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
node.right,
num_table)
where.append(f"t{num_table}.entity->'$.{node.right.paths[-1]}' IS NOT NULL\n")
elif node.operator == "not":
assert isinstance(node.right, FilterPath)
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
node.right,
num_table)
where.append(f"t{num_table}.entity->'$.{node.right.paths[-1]}' IS NULL\n")
else:
assert False
elif isinstance(node, FilterBinary):
if node.operator in ["and", "or"]:
use_inner = _use_inner_join(node)
if use_inner:
if isinstance(node.left, FilterBinary) and _use_inner_join(node.left):
log.warning("SQLite can not implement this request. Result may be invalid")
if isinstance(node.right, FilterBinary) and _use_inner_join(node.right):
log.warning("SQLite can not implement this request. Result may be invalid")
generated_sql = []
if node.operator == "and":
num_table += 1
num_table, left_sql = _generate_sql_block(table_name, customer_id, version,
0,
node.left,
num_table)
num_table += 1
num_table, right_sql = _generate_sql_block(table_name, customer_id, version,
0,
node.right,
num_table)
num_table += 1
generated_sql.append(f"\nSELECT t{num_table}.entity FROM haystack as t{num_table}\n")
generated_sql.append("WHERE entity->'$.id' in (")
generated_sql.append(left_sql)
generated_sql.append(")\nAND entity->'$.id' in (")
generated_sql.append(right_sql)
generated_sql.append(")\n")
select = generated_sql
where = []
else:
num_table, sql = _generate_sql_block(table_name, customer_id, version,
0,
node.left,
num_table)
generated_sql.append(sql)
generated_sql.append("UNION")
num_table += 1
num_table, sql = _generate_sql_block(table_name, customer_id, version,
0,
node.right,
num_table)
generated_sql.append(sql)
select = generated_sql
where = []
else:
where.append('(')
parent_left = isinstance(node.left, FilterBinary) and node.left.operator in ["and", "or"]
num_table, select, where = _generate_filter_in_sql(table_name, customer_id, version,
select,
where,
node.left,
num_table)
if parent_left:
where = \
["".join(_flatten(where))[:-1],
f"\n{node.operator.upper()} "
]
else:
where.append(f"{node.operator.upper()} ")
num_table, select, where = _generate_filter_in_sql(table_name, customer_id, version,
select,
where,
node.right,
num_table)
if where:
where.append(")\n")
else:
value = node.right
if isinstance(value, Quantity):
value = value.m
if isinstance(value, (int, float)) and node.operator not in ('==', '!='):
# Comparison with numbers. Must remove the header 'n:'
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
cast(FilterPath, node.left),
num_table)
where.extend([
f"CAST(SUBSTR(t{num_table}.entity->"
f"'$.{cast(FilterPath, node.left).paths[-1]}',3) AS REAL)",
f" {node.operator} {value}\n",
])
elif isinstance(value, time) and node.operator not in ('==', '!='):
# Comparison with numbers. Must remove the header 'n:'
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
cast(FilterPath, node.left),
num_table)
where.extend([
f"CAST(SUBSTR(t{num_table}.entity->"
f"'$.{cast(FilterPath, node.left).paths[-1]}',4,8) AS TIME)",
f" {node.operator} CAST('{value}' AS TIME)\n",
])
elif isinstance(value, datetime) and node.operator not in ('==', '!='):
# Comparison with numbers. Must remove the header 'n:'
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
cast(FilterPath, node.left),
num_table)
where.extend([
f"CAST(SUBSTR(t{num_table}.entity->"
f"'$.{cast(FilterPath, node.left).paths[-1]}',4,10) AS DATETIME)",
f" {node.operator} CAST('{value}' AS DATETIME)\n",
])
elif isinstance(value, date) and node.operator not in ('==', '!='):
# Comparison with numbers. Must remove the header 'n:'
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
cast(FilterPath, node.left),
num_table)
where.extend([
f"CAST(SUBSTR(t{num_table}.entity->"
f"'$.{cast(FilterPath, node.left).paths[-1]}',4,25) AS DATE)",
f" {node.operator} CAST('{value}' AS DATE)\n",
])
elif isinstance(value, str) and node.operator not in ('==', '!='):
# Comparison with numbers. Must remove the header 'n:'
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
cast(FilterPath, node.left),
num_table)
where.extend([
f"(SUBSTR(t{num_table}.entity->"
f"'$.{cast(FilterPath, node.left).paths[-1]}'",
f",4,LENGTH('$.{cast(FilterPath, node.left).paths[-1]}')-4)"
f" {node.operator} '{value}')\n",
])
else:
assert node.operator in ('==', '!='), "Operator not supported for this type"
num_table, select, where = \
_generate_path(table_name, customer_id, version,
select, where,
cast(FilterPath, node.left),
num_table)
if value is None:
if node.operator == '!=':
where.append(
f"t{num_table}.entity->'$.{cast(FilterPath, node.left).paths[-1]}' "
f"IS NOT NULL\n")
else:
where.append(
f"t{num_table}.entity->'$.{cast(FilterPath, node.left).paths[-1]}' "
f"IS NULL\n")
else:
if isinstance(value, Ref):
where.append(
f"t{num_table}.entity->'$.{cast(FilterPath, node.left).paths[-1]}' "
f"LIKE '\"{str(json.loads(jsondumper.dump_scalar(value)))}%\"'\n")
else:
where.append(
f"t{num_table}.entity->'$.{cast(FilterPath, node.left).paths[-1]}' "
f"{_map_operator[node.operator]} "
f"'{str(json.loads(jsondumper.dump_scalar(value)))}'\n")
else:
assert False, "Invalid node"
return num_table, select, where
def _flatten(a_list: List[Any]) -> Iterator[Any]:
return itertools.chain.from_iterable(a_list)
def _select_version(version: datetime, num_table: int) -> str:
return f"'{version.isoformat()}' " \
f"BETWEEN t{num_table}.start_datetime " \
f"AND t{num_table}.end_datetime\n"
@dataclass
class _FilterDate(FilterNode):
version: datetime
num_table: int
customer_id: str
def _generate_sql_block(table_name: str,
customer_id: str,
version: datetime,
limit: int,
node: FilterNode,
num_table: int) -> Tuple[int, str]:
init_num_table = num_table
select = [textwrap.dedent(f"""
SELECT t{num_table}.entity
FROM {table_name} as t{num_table}
""")]
num_table, select, where = _generate_filter_in_sql(
table_name, customer_id, version,
select,
[],
FilterBinary("and", _FilterDate(version, num_table, customer_id), node),
num_table
)
generated_sql = "".join(_flatten(select))
if init_num_table == num_table:
generated_sql += "WHERE\n"
generated_sql += "".join(_flatten(where))
if limit > 0:
generated_sql += f"LIMIT {limit}\n"
return num_table, generated_sql
def _sql_filter(table_name: str,
grid_filter: Optional[str],
version: datetime,
limit: int = 0,
customer_id: str = '') -> str:
_, sql = _generate_sql_block(
table_name,
customer_id,
version,
limit,
parse_filter(grid_filter).head,
num_table=1)
sql_request = f'-- {grid_filter}{sql}'
return sql_request
def _exec_sql_filter(params: Dict[str, Any],
cursor,
table_name: str,
grid_filter: Optional[str],
version: datetime,
limit: int = 0,
customer_id: Optional[str] = None) -> DBCursor:
if grid_filter is None or grid_filter == '':
cursor.execute(params["SELECT_ENTITY"], (version, customer_id))
return cursor
sql_request = _sql_filter(
table_name,
grid_filter,
version,
limit,
customer_id)
cursor.execute(sql_request)
return cursor
def get_db_parameters(database_name: str, table_name: str) -> Dict[str, Union[Callable, str]]:
""" Return the SQL request and some lambda to manipulate a SuperSQLite database.
Args:
database_name: The database name.
table_name: The table name to use.
Returns:
A dictionary with SQL request or lamdas
"""
return {
"sql_type_to_json": json.loads,
"exec_sql_filter": _exec_sql_filter,
"field_to_datetime_tz": lambda val: val.replace(tzinfo=pytz.utc),
"datetime_tz_to_field": lambda dt: datetime(dt.year, dt.month, dt.day,
dt.hour, dt.minute, dt.second, dt.microsecond,
tzinfo=dt.tzinfo).replace(tzinfo=pytz.utc).isoformat(),
"CREATE_HAYSTACK_TABLE": textwrap.dedent(f'''
CREATE TABLE IF NOT EXISTS {table_name}
(
id VARCHAR(256),
customer_id VARCHAR(128) NOT NULL,
start_datetime DATETIME(6) NOT NULL,
end_datetime DATETIME(6) NOT NULL,
entity JSON NOT NULL
);
'''),
"CREATE_HAYSTACK_INDEX_1": textwrap.dedent(f'''
select if (
exists(
select distinct index_name from information_schema.statistics
where table_schema = '{database_name}'
and table_name = '{table_name}' and index_name like 'index_1'
)
,'select ''index index_1 exists'' _______;'
,'create index index_1 on {table_name}(id, customer_id)') into @a;
PREPARE stmt1 FROM @a;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;
'''),
"CREATE_HAYSTACK_INDEX_2": '',
"CREATE_METADATA_TABLE": textwrap.dedent(f'''
CREATE TABLE IF NOT EXISTS {table_name}_meta_datas
(
customer_id VARCHAR(256) NOT NULL,
start_datetime DATETIME(6) NOT NULL,
end_datetime DATETIME(6) NOT NULL,
metadata JSON,
cols JSON
);
'''),
"PURGE_TABLES_HAYSTACK": textwrap.dedent(f'''
DELETE FROM {table_name} ;
'''),
"PURGE_TABLES_HAYSTACK_META": textwrap.dedent(f'''
DELETE FROM {table_name}_meta_datas ;
'''),
"SELECT_META_DATA": textwrap.dedent(f'''
SELECT metadata,cols FROM {table_name}_meta_datas
WHERE %s BETWEEN start_datetime AND end_datetime
AND customer_id=%s
'''),
"CLOSE_META_DATA": textwrap.dedent(f'''
UPDATE {table_name}_meta_datas SET end_datetime=%s
WHERE %s >= start_datetime AND end_datetime = '9999-12-31T23:59:59'
AND customer_id=%s
'''),
"UPDATE_META_DATA": textwrap.dedent(f'''
INSERT INTO {table_name}_meta_datas VALUES (%s,%s,'9999-12-31T23:59:59',%s,%s)
'''),
"SELECT_ENTITY": textwrap.dedent(f'''
SELECT entity FROM {table_name}
WHERE %s BETWEEN start_datetime AND end_datetime
AND customer_id = %s
'''),
"SELECT_ENTITY_WITH_ID": textwrap.dedent(f'''
SELECT entity FROM {table_name}
WHERE %s BETWEEN start_datetime AND end_datetime
AND customer_id = %s
AND id IN '''),
"CLOSE_ENTITY": textwrap.dedent(f'''
UPDATE {table_name} SET end_datetime=%s
WHERE %s > start_datetime AND end_datetime = '9999-12-31T23:59:59'
AND id=%s
AND customer_id = %s
'''),
"INSERT_ENTITY": textwrap.dedent(f'''
INSERT INTO {table_name} VALUES (%s,%s,%s,'9999-12-31T23:59:59',%s)
'''),
"DISTINCT_VERSION": textwrap.dedent(f'''
SELECT DISTINCT start_datetime
FROM {table_name}
WHERE customer_id = %s
ORDER BY start_datetime
'''),
"DISTINCT_TAG_VALUES": textwrap.dedent(f'''
SELECT DISTINCT json_extract(entity,'$.[#]')
FROM {table_name}
WHERE customer_id = %s
'''),
"CREATE_TS_TABLE": textwrap.dedent(f'''
CREATE TABLE IF NOT EXISTS {table_name}_ts
(
id VARCHAR(256) NOT NULL,
customer_id VARCHAR(128) NOT NULL,
date_time DATETIME(6) NOT NULL,
val JSON NOT NULL
);
'''),
"CREATE_TS_INDEX": textwrap.dedent(f'''
select if (
exists(
select distinct index_name from information_schema.statistics
where table_schema = '{database_name}'
and table_name = '{table_name}_ts' and index_name like 'index_1'
)
,'select ''index index_1 exists'' _______;'
,'create index index_1 on {table_name}_ts(id, customer_id)') into @a;
PREPARE stmt1 FROM @a;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;
'''),
"CLEAN_TS": textwrap.dedent(f'''
DELETE FROM {table_name}_ts
WHERE customer_id = %s
AND id = %s
AND date_time BETWEEN %s AND %s
'''),
"INSERT_TS": textwrap.dedent(f'''
INSERT INTO {table_name}_ts
VALUES(%s,%s,%s,%s)
'''),
"SELECT_TS": textwrap.dedent(f'''
SELECT date_time,val FROM {table_name}_ts
WHERE customer_id = %s
AND id = %s
AND date_time BETWEEN %s AND %s
ORDER BY date_time
'''),
"PURGE_TABLES_TS": textwrap.dedent(f'''
DELETE FROM {table_name}_ts
''')
}
Functions
def get_db_parameters(database_name: str, table_name: str) ‑> Dict[str, Union[Callable, str]]
-
Return the SQL request and some lambda to manipulate a SuperSQLite database.
Args
database_name
- The database name.
table_name
- The table name to use.
Returns
A dictionary with SQL request or lamdas
Expand source code
def get_db_parameters(database_name: str, table_name: str) -> Dict[str, Union[Callable, str]]: """ Return the SQL request and some lambda to manipulate a SuperSQLite database. Args: database_name: The database name. table_name: The table name to use. Returns: A dictionary with SQL request or lamdas """ return { "sql_type_to_json": json.loads, "exec_sql_filter": _exec_sql_filter, "field_to_datetime_tz": lambda val: val.replace(tzinfo=pytz.utc), "datetime_tz_to_field": lambda dt: datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond, tzinfo=dt.tzinfo).replace(tzinfo=pytz.utc).isoformat(), "CREATE_HAYSTACK_TABLE": textwrap.dedent(f''' CREATE TABLE IF NOT EXISTS {table_name} ( id VARCHAR(256), customer_id VARCHAR(128) NOT NULL, start_datetime DATETIME(6) NOT NULL, end_datetime DATETIME(6) NOT NULL, entity JSON NOT NULL ); '''), "CREATE_HAYSTACK_INDEX_1": textwrap.dedent(f''' select if ( exists( select distinct index_name from information_schema.statistics where table_schema = '{database_name}' and table_name = '{table_name}' and index_name like 'index_1' ) ,'select ''index index_1 exists'' _______;' ,'create index index_1 on {table_name}(id, customer_id)') into @a; PREPARE stmt1 FROM @a; EXECUTE stmt1; DEALLOCATE PREPARE stmt1; '''), "CREATE_HAYSTACK_INDEX_2": '', "CREATE_METADATA_TABLE": textwrap.dedent(f''' CREATE TABLE IF NOT EXISTS {table_name}_meta_datas ( customer_id VARCHAR(256) NOT NULL, start_datetime DATETIME(6) NOT NULL, end_datetime DATETIME(6) NOT NULL, metadata JSON, cols JSON ); '''), "PURGE_TABLES_HAYSTACK": textwrap.dedent(f''' DELETE FROM {table_name} ; '''), "PURGE_TABLES_HAYSTACK_META": textwrap.dedent(f''' DELETE FROM {table_name}_meta_datas ; '''), "SELECT_META_DATA": textwrap.dedent(f''' SELECT metadata,cols FROM {table_name}_meta_datas WHERE %s BETWEEN start_datetime AND end_datetime AND customer_id=%s '''), "CLOSE_META_DATA": textwrap.dedent(f''' UPDATE {table_name}_meta_datas SET end_datetime=%s WHERE %s >= start_datetime AND end_datetime = '9999-12-31T23:59:59' AND customer_id=%s '''), "UPDATE_META_DATA": textwrap.dedent(f''' INSERT INTO {table_name}_meta_datas VALUES (%s,%s,'9999-12-31T23:59:59',%s,%s) '''), "SELECT_ENTITY": textwrap.dedent(f''' SELECT entity FROM {table_name} WHERE %s BETWEEN start_datetime AND end_datetime AND customer_id = %s '''), "SELECT_ENTITY_WITH_ID": textwrap.dedent(f''' SELECT entity FROM {table_name} WHERE %s BETWEEN start_datetime AND end_datetime AND customer_id = %s AND id IN '''), "CLOSE_ENTITY": textwrap.dedent(f''' UPDATE {table_name} SET end_datetime=%s WHERE %s > start_datetime AND end_datetime = '9999-12-31T23:59:59' AND id=%s AND customer_id = %s '''), "INSERT_ENTITY": textwrap.dedent(f''' INSERT INTO {table_name} VALUES (%s,%s,%s,'9999-12-31T23:59:59',%s) '''), "DISTINCT_VERSION": textwrap.dedent(f''' SELECT DISTINCT start_datetime FROM {table_name} WHERE customer_id = %s ORDER BY start_datetime '''), "DISTINCT_TAG_VALUES": textwrap.dedent(f''' SELECT DISTINCT json_extract(entity,'$.[#]') FROM {table_name} WHERE customer_id = %s '''), "CREATE_TS_TABLE": textwrap.dedent(f''' CREATE TABLE IF NOT EXISTS {table_name}_ts ( id VARCHAR(256) NOT NULL, customer_id VARCHAR(128) NOT NULL, date_time DATETIME(6) NOT NULL, val JSON NOT NULL ); '''), "CREATE_TS_INDEX": textwrap.dedent(f''' select if ( exists( select distinct index_name from information_schema.statistics where table_schema = '{database_name}' and table_name = '{table_name}_ts' and index_name like 'index_1' ) ,'select ''index index_1 exists'' _______;' ,'create index index_1 on {table_name}_ts(id, customer_id)') into @a; PREPARE stmt1 FROM @a; EXECUTE stmt1; DEALLOCATE PREPARE stmt1; '''), "CLEAN_TS": textwrap.dedent(f''' DELETE FROM {table_name}_ts WHERE customer_id = %s AND id = %s AND date_time BETWEEN %s AND %s '''), "INSERT_TS": textwrap.dedent(f''' INSERT INTO {table_name}_ts VALUES(%s,%s,%s,%s) '''), "SELECT_TS": textwrap.dedent(f''' SELECT date_time,val FROM {table_name}_ts WHERE customer_id = %s AND id = %s AND date_time BETWEEN %s AND %s ORDER BY date_time '''), "PURGE_TABLES_TS": textwrap.dedent(f''' DELETE FROM {table_name}_ts ''') }