# Licensed Materials - Property of IBM
# 5737-M66, 5900-AAA, 5900-AMG
# (C) Copyright IBM Corp. 2019, 2025 All Rights Reserved.
# US Government Users Restricted Rights - Use, duplication, or disclosure
# restricted by GSA ADP Schedule Contract with IBM Corp.

import math
import random

import numpy as np
import pandas as pd
import pytest

from iotfunctions.metadata import Granularity

from .. import api
from ..persist import PersistColumns
from ..util import current_directory, log_df_info


@pytest.fixture
def db():
    return api._get_db(echo=False)


@pytest.fixture
def db_schema():
    return None


def test_grain_table_creation(db, db_schema):
    try:
        # base KPI table, 'None' grain

        base_grain_table = 'dm_abcd_%05d' % math.floor(random.random() * 10**5)
        if db.if_exists(table_name=base_grain_table, schema=db_schema):
            db.drop_table(table_name=base_grain_table, schema=db_schema)

        assert db.if_exists(table_name=base_grain_table, schema=db_schema) == False

        writer = PersistColumns(
            target_grain=None,
            target_grain_tuple=None,
            target_table=base_grain_table,
            db=db,
            db_schema=db_schema)

        df = pd.DataFrame(
            columns=['entity_id', 'timestamp', 'k1'],
            data=[
                ['d1', '2019-01-01', 1],
                ['d2', '2019-01-01', 2],
                ['d3', '2019-01-01', 3],
            ]
        )
        df = df.astype({'timestamp': 'datetime64[ms]'})
        df = df.set_index(['entity_id', 'timestamp'])

        writer.execute(df)

        query = 'select * from %s' % base_grain_table
        df_read = pd.read_sql(sql=query, con=db.connection, parse_dates=['timestamp', 'value_t'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 3
        assert len(df_read.columns) == 8

        # 'Daily' grain (only this grain would be forcefully created by PersistColumn)

        daily_grain_table = 'dm_abcd_daily_%05d' % math.floor(random.random() * 10**5)
        if db.if_exists(table_name=daily_grain_table, schema=db_schema):
            db.drop_table(table_name=daily_grain_table, schema=db_schema)

        assert db.if_exists(table_name=daily_grain_table, schema=db_schema) == False

        writer = PersistColumns(
            target_grain='Daily',
            target_grain_tuple=('D', None, True),
            target_table=daily_grain_table,
            db=db,
            db_schema=db_schema)

        df = pd.DataFrame(
            columns=['entity_id', 'timestamp', 'k1'],
            data=[
                ['d1', '2019-01-01', 1],
                ['d2', '2019-01-01', 2],
                ['d3', '2019-01-01', 3],
            ]
        )
        df = df.astype({'timestamp': 'datetime64[ms]'})
        df = df.set_index(['entity_id', 'timestamp'])

        writer.execute(df)

        query = 'select * from %s' % daily_grain_table
        df_read = pd.read_sql(sql=query, con=db.connection, parse_dates=['timestamp', 'value_t'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 3
        assert len(df_read.columns) == 8

        # 'Daily' grain

        group_daily_grain_table = 'dm_abcd_Daily_%05d' % math.floor(random.random() * 10**5)
        if db.if_exists(table_name=group_daily_grain_table, schema=db_schema):
            db.drop_table(table_name=group_daily_grain_table, schema=db_schema)

        assert db.if_exists(table_name=group_daily_grain_table, schema=db_schema) == False

        writer = PersistColumns(
            target_grain='Daily',
            target_grain_tuple=('D', None, False),
            target_table=group_daily_grain_table,
            db=db,
            db_schema=db_schema)

        df = pd.DataFrame(
            columns=['timestamp', 'k1'],
            data=[
                ['2019-01-01', 1],
                ['2019-01-02', 2],
                ['2019-01-03', 3],
            ]
        )
        df = df.astype({'timestamp': 'datetime64[ms]'})
        df = df.set_index(['timestamp'])

        writer.execute(df)

        query = 'select * from %s' % group_daily_grain_table
        df_read = pd.read_sql(sql=query, con=db.connection, parse_dates=['timestamp', 'value_t'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 3
        assert len(df_read.columns) == 7
    finally:
        db.drop_table(table_name=base_grain_table, schema=db_schema)
        db.drop_table(table_name=daily_grain_table, schema=db_schema)
        db.drop_table(table_name=group_daily_grain_table, schema=db_schema)


def test_read_daily_grain_table(db, db_schema):
    try:
        daily_grain_table = 'dm_abcd_daily_%05d' % math.floor(random.random() * 10**5)
        if db.if_exists(table_name=daily_grain_table, schema=db_schema):
            db.drop_table(table_name=daily_grain_table, schema=db_schema)

        assert db.if_exists(table_name=daily_grain_table, schema=db_schema) == False

        writer = PersistColumns(
            target_grain='Daily',
            target_grain_tuple=('D', None, True),
            target_table=daily_grain_table,
            db=db,
            db_schema=db_schema)

        df = pd.DataFrame(
            columns=['entity_id', 'timestamp', 'k1', 'k2', 'k3', 'k4'],
            data=[
                ['d1', '2019-01-01', 1, True,  '1', None],
                ['d1', '2019-01-02', 2, False, '2', None],
                ['d1', '2019-01-03', 3, False, '3', None],
                ['d1', '2019-01-04', 4, False, '4', None],
                ['d1', '2019-01-05', 5, False, '5', None],
                ['d2', '2019-01-01', 6, None,  '6', None],
                ['d2', '2019-01-02', 7, None,  '7', None],
                ['d3', '2019-01-01', 8, None,  '8', pd.Timestamp('2019-01-01T11:00:00')],
            ]
        )
        df = df.astype({'timestamp': 'datetime64[ms]', 'k2': bool})
        df = df.set_index(['entity_id', 'timestamp'])

        writer.execute(df)

        # normal query without any condition
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=None, data_items=None)
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 8
        assert len(df_read.columns) == 4
        assert 'k1' in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' in df_read.columns
        assert 'k4' in df_read.columns

        # query with only 1 data item
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=None, data_items=['k1'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 8
        assert len(df_read.columns) == 1
        assert 'k1' in df_read.columns
        assert 'k2' not in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' not in df_read.columns

        # query with only 1 data item
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=None, data_items=['k2'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 8
        assert len(df_read.columns) == 1
        assert 'k1' not in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' not in df_read.columns

        # query with only 1 data item
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=None, data_items=['k4'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 1
        assert len(df_read.columns) == 1
        assert 'k1' not in df_read.columns
        assert 'k2' not in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' in df_read.columns

        # query with 2 data item (any)
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=None, data_items=['k2', 'k4'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 8
        assert len(df_read.columns) == 2
        assert 'k1' not in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' in df_read.columns

        # query with 2 data item (any) and filtering only one entity
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=['d1'], data_items=['k2', 'k4'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 5
        assert len(df_read.columns) == 2
        assert 'k1' not in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' in df_read.columns

        # query with 2 data item (any) and filtering only one entity, additionally with time range filtering
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts='2019-01-02', end_ts='2019-01-05', entities=['d1'], data_items=['k2', 'k4'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 3
        assert len(df_read.columns) == 2
        assert 'k1' not in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' in df_read.columns

        # query with 2 data item (any) and filtering two entities, additionally with time range filtering
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts='2019-01-01', end_ts='2019-01-05', entities=['d1', 'd2'], data_items=['k2', 'k4'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 6
        assert len(df_read.columns) == 2
        assert 'k1' not in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' in df_read.columns

        # query with 2 data item (any) and filtering two entities, additionally with time range filtering
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts='2019-01-01', end_ts='2019-01-05', entities=['d1', 'd3'], data_items=['k2', 'k4'])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 5
        assert len(df_read.columns) == 2
        assert 'k1' not in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' not in df_read.columns
        assert 'k4' in df_read.columns
    finally:
        db.drop_table(table_name=daily_grain_table, schema=db_schema)


def test_read_derived_data_validation(db, db_schema):
    try:
        daily_grain_table = 'dm_abcd_daily_%05d' % math.floor(random.random() * 10**5)
        if db.if_exists(table_name=daily_grain_table, schema=db_schema):
            db.drop_table(table_name=daily_grain_table, schema=db_schema)

        assert db.if_exists(table_name=daily_grain_table, schema=db_schema) == False

        writer = PersistColumns(
            target_grain='Daily',
            target_grain_tuple=('D', None, True),
            target_table=daily_grain_table,
            db=db,
            db_schema=db_schema)

        df = pd.DataFrame(
            columns=['entity_id', 'timestamp', 'k1', 'k2', 'k3', 'k4'],
            data=[
                ['d1', '2019-01-01', 1, True,  '1', None],
                ['d1', '2019-01-02', 2, False, '2', None],
                ['d1', '2019-01-03', 3, False, '3', None],
                ['d1', '2019-01-04', 4, False, '4', None],
                ['d1', '2019-01-05', 5, False, '5', None],
                ['d2', '2019-01-01', 6, None,  '6', None],
                ['d2', '2019-01-02', 7, None,  '7', None],
                ['d3', '2019-01-01', 8, None,  '8', pd.Timestamp('2019-01-01T11:00:00')],
            ]
        )
        df = df.astype({'timestamp': 'datetime64[ms]', 'k2': bool})
        df = df.set_index(['entity_id', 'timestamp'])

        writer.execute(df)

        # None or empty index_columns
        with pytest.raises(ValueError):
            df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=None)
        with pytest.raises(ValueError):
            df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=[])

        # empty entities list are treated as None
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=[], data_items=None)
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 8
        assert len(df_read.columns) == 4
        assert 'k1' in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' in df_read.columns
        assert 'k4' in df_read.columns

        # empty data_item list are treated as None
        df_read = api._get_derived_data(db, table_name=daily_grain_table, schema=db_schema, index_columns=['entity_id', 'timestamp'], start_ts=None, end_ts=None, entities=None, data_items=[])
        print('df_read=%s' % log_df_info(df_read, head=5))

        assert len(df_read) == 8
        assert len(df_read.columns) == 4
        assert 'k1' in df_read.columns
        assert 'k2' in df_read.columns
        assert 'k3' in df_read.columns
        assert 'k4' in df_read.columns
    finally:
        db.drop_table(table_name=daily_grain_table, schema=db_schema)


def test_get_derived_data(db, db_schema):
    df_data_sensor = pd.read_csv('%s/trainbrake_device_data.csv' % current_directory(file=__file__), parse_dates=['RCV_TIMESTAMP_UTC'])[:10]
    df_data_sensor['DEVICEID'] = np.where(df_data_sensor['DEVICEID'] == 'TrainBrake_1', 'abcd-1', 'abcd-2')
    # mixed-case raw data item (but we need to play some trick later to actually make the data item metadata created 
    # by setup_iot_type to be mixed-case since that method make everything lower-case)
    df_data_sensor['MixedCaseAxlevibration'] = df_data_sensor['TRAINBRAKESIMULATION_AXLEVIBRATION']
    print(log_df_info(df_data_sensor, head=5))

    iot_entity_type = 'abcd%05d' % math.floor(random.random() * 10**5)
    iot_entity_id = 'deviceid'
    iot_timestamp = 'rcv_timestamp_utc'
    base_dm_table = 'dm_%s' % iot_entity_type.lower()
    base_dm_entity_id = 'entity_id'
    base_dm_timestamp = 'timestamp'

    try:
        # setup raw data

        # note even though there's mixed-case data item names used, this method actually make everything lower case, 
        # so there's 'axlevibration' data item, not 'Axlevibration'
        api.setup_iot_type(iot_entity_type, df_data_sensor, columns=['TRAINBRAKESIMULATION_AXLEVIBRATION', 'MixedCaseAxlevibration'], deviceid_column='DEVICEID', timestamp_column='RCV_TIMESTAMP_UTC', timestamp_in_payload=False, parse_dates=None, rename_columns={'TRAINBRAKESIMULATION_AXLEVIBRATION': 'Axlevibration'}, write='deletefirst', use_wiotp=False, import_only=False, db=db, db_schema=db_schema)

        # setup derived data

        grain_daily = Granularity(
            name='Daily', dimensions=None, timestamp=iot_timestamp, freq='D', entity_name=iot_entity_type, entity_id=iot_entity_id)
        grain_Daily = Granularity(
            name='Daily', dimensions=None, timestamp=iot_timestamp, freq='D', entity_name=iot_entity_type, entity_id=None)
        grain_hourly = Granularity(
            name='Hourly', dimensions=None, timestamp=iot_timestamp, freq='H', entity_name=iot_entity_type, entity_id=iot_entity_id)
        grain_grouphourly = Granularity(
            name='GroupHourly', dimensions=None, timestamp=iot_timestamp, freq='H', entity_name=iot_entity_type, entity_id=None)
        granularities = [grain_daily, grain_Daily, grain_hourly, grain_grouphourly]

        derived_data_items = [
            {
                'name': 'axlevibrationplusone',
                'type': 'DERIVED_METRIC',
                'columnName': 'axlevibrationplusone',
                'columnType': 'NUMBER',
                'sourceTableName': base_dm_table,
                'parentDataItemName': 'Axlevibration',
                'kpiFunctionDto': {'granularity': None},
            },
            {
                'name': 'MixedCaseAxlevibrationPlusOne',
                'type': 'DERIVED_METRIC',
                'columnName': 'mixedcaseaxlevibrationplusone',
                'columnType': 'NUMBER',
                'sourceTableName': base_dm_table,
                'parentDataItemName': 'Axlevibration',
                'kpiFunctionDto': {'granularity': None},
            },
            {
                'name': 'daily_axlevibration',
                'type': 'DERIVED_METRIC',
                'columnName': 'daily_axlevibration',
                'columnType': 'NUMBER',
                'sourceTableName': grain_daily.table_name,
                'parentDataItemName': 'Axlevibration',
                'kpiFunctionDto': {'granularity': grain_daily.name},
            },
            {
                'name': 'group_daily_axlevibration',
                'type': 'DERIVED_METRIC',
                'columnName': 'group_daily_axlevibration',
                'columnType': 'NUMBER',
                'sourceTableName': grain_Daily.table_name,
                'parentDataItemName': 'Axlevibration',
                'kpiFunctionDto': {'granularity': grain_Daily.name},
            },
            {
                'name': 'hourly_axlevibration',
                'type': 'DERIVED_METRIC',
                'columnName': 'hourly_axlevibration',
                'columnType': 'NUMBER',
                'sourceTableName': grain_hourly.table_name,
                'parentDataItemName': 'Axlevibration',
                'kpiFunctionDto': {'granularity': grain_hourly.name},
            },
            {
                'name': 'group_hourly_axlevibration',
                'type': 'DERIVED_METRIC',
                'columnName': 'group_hourly_axlevibration',
                'columnType': 'NUMBER',
                'sourceTableName': grain_grouphourly.table_name,
                'parentDataItemName': 'Axlevibration',
                'kpiFunctionDto': {'granularity': grain_grouphourly.name},
            },
        ]

        df = df_data_sensor[[iot_entity_id, iot_timestamp, 'trainbrakesimulation_axlevibration']].rename(columns={iot_entity_id: base_dm_entity_id, iot_timestamp: base_dm_timestamp})
        df['axlevibrationplusone'] = df['trainbrakesimulation_axlevibration'] + 1
        df['MixedCaseAxlevibrationPlusOne'] = df['trainbrakesimulation_axlevibration'] + 1
        df = df.drop(columns=['trainbrakesimulation_axlevibration'])
        df = df.set_index([base_dm_entity_id, base_dm_timestamp])
        PersistColumns(
            target_grain=None,
            target_grain_tuple=('', None, True), # has to be not None in order to get timestamp included as key
            target_table='dm_%s' % iot_entity_type.lower(),
            db=db,
            db_schema=db_schema).execute(df, force_create=True)

        df = df_data_sensor[[iot_entity_id, iot_timestamp, 'trainbrakesimulation_axlevibration']].rename(columns={iot_entity_id: base_dm_entity_id, iot_timestamp: base_dm_timestamp})
        df = df.groupby([pd.Grouper(key=base_dm_entity_id), pd.Grouper(key=base_dm_timestamp, freq=grain_daily.freq)]).mean()
        df = df.rename(columns={'trainbrakesimulation_axlevibration': 'daily_axlevibration'})
        PersistColumns(
            target_grain=grain_daily.name,
            target_grain_tuple=(grain_daily.freq, None, True),
            target_table=grain_daily.table_name,
            db=db,
            db_schema=db_schema).execute(df, force_create=True)

        df = df_data_sensor[[iot_entity_id, iot_timestamp, 'trainbrakesimulation_axlevibration']].rename(columns={iot_entity_id: base_dm_entity_id, iot_timestamp: base_dm_timestamp})
        df = df.groupby([pd.Grouper(key=base_dm_timestamp, freq=grain_Daily.freq)]).mean()
        df = df.rename(columns={'trainbrakesimulation_axlevibration': 'group_daily_axlevibration'})
        PersistColumns(
            target_grain=grain_Daily.name,
            target_grain_tuple=(grain_Daily.freq, None, False),
            target_table=grain_Daily.table_name,
            db=db,
            db_schema=db_schema).execute(df, force_create=True)

        df = df_data_sensor[[iot_entity_id, iot_timestamp, 'trainbrakesimulation_axlevibration']].rename(columns={iot_entity_id: base_dm_entity_id, iot_timestamp: base_dm_timestamp})
        df = df.groupby([pd.Grouper(key=base_dm_entity_id), pd.Grouper(key=base_dm_timestamp, freq=grain_hourly.freq)]).mean()
        df = df.rename(columns={'trainbrakesimulation_axlevibration': 'hourly_axlevibration'})
        PersistColumns(
            target_grain=grain_hourly.name,
            target_grain_tuple=(grain_hourly.freq, None, True),
            target_table=grain_hourly.table_name,
            db=db,
            db_schema=db_schema).execute(df, force_create=True)

        df = df_data_sensor[[iot_entity_id, iot_timestamp, 'trainbrakesimulation_axlevibration']].rename(columns={iot_entity_id: base_dm_entity_id, iot_timestamp: base_dm_timestamp})
        df = df.groupby([pd.Grouper(key=base_dm_timestamp, freq=grain_grouphourly.freq)]).mean()
        df = df.rename(columns={'trainbrakesimulation_axlevibration': 'group_hourly_axlevibration'})
        PersistColumns(
            target_grain=grain_grouphourly.name,
            target_grain_tuple=(grain_grouphourly.freq, None, False),
            target_table=grain_grouphourly.table_name,
            db=db,
            db_schema=db_schema).execute(df, force_create=True)

        # mock derived data metadata
        entity_type = api.get_entity_type(iot_entity_type, db, *granularities)
        for dt in entity_type._data_items:
            # special trick here to actually make the mixed-case raw data item metadata to be mixed-case
            if dt['name'].lower() == 'mixedcaseaxlevibration':
                dt['name'] = 'MixedCaseAxlevibration'
        print(entity_type._data_items)
        entity_type._data_items.extend(derived_data_items)
        print(entity_type._data_items)

        # get all data items
        df = api.get_entity_type_data(entity_type)
        print(log_df_info(df, head=-1))
        assert 'axlevibration' in df.columns
        # mixed-case raw items should be loaded as-is
        assert 'MixedCaseAxlevibration' in df.columns
        assert 'axlevibrationplusone' in df.columns
        # mixed-case derived items should be loaded as-is
        assert 'MixedCaseAxlevibrationPlusOne' in df.columns
        # loading aggregated derived data merged with raw or base level data is not supported yet
        assert 'daily_axlevibration' not in df.columns
        assert 'group_daily_axlevibration' not in df.columns
        assert 'hourly_axlevibration' not in df.columns
        assert 'group_hourly_axlevibration' not in df.columns

        # get filtered data items

        # time-series raw data
        df = api.get_entity_type_data(entity_type, data_items=['axlevibration', 'MixedCaseAxlevibration'])
        print(log_df_info(df, head=-1))
        assert 'axlevibration' in df.columns
        assert 'MixedCaseAxlevibration' in df.columns
        assert 'axlevibrationplusone' not in df.columns
        assert 'MixedCaseAxlevibrationPlusOne' not in df.columns
        assert 'daily_axlevibration' not in df.columns
        assert 'group_daily_axlevibration' not in df.columns
        assert 'hourly_axlevibration' not in df.columns
        assert 'group_hourly_axlevibration' not in df.columns

        # time-series raw data, the special case that users use non-all-lower case name to retrieve data items
        df = api.get_entity_type_data(entity_type, data_items=['Axlevibration'])
        print(log_df_info(df, head=-1))
        assert 'Axlevibration' in df.columns

        # time-series non-aggregated derived data
        df = api.get_entity_type_data(entity_type, data_items=['axlevibrationplusone', 'MixedCaseAxlevibrationPlusOne'])
        print(log_df_info(df, head=-1))
        assert 'axlevibration' not in df.columns
        assert 'MixedCaseAxlevibration' not in df.columns
        assert 'axlevibrationplusone' in df.columns
        assert 'MixedCaseAxlevibrationPlusOne' in df.columns
        assert 'daily_axlevibration' not in df.columns
        assert 'group_daily_axlevibration' not in df.columns
        assert 'hourly_axlevibration' not in df.columns
        assert 'group_hourly_axlevibration' not in df.columns

        # time-series raw and non-aggregated derived data can be merged
        df = api.get_entity_type_data(entity_type, data_items=['axlevibration', 'axlevibrationplusone', 'MixedCaseAxlevibrationPlusOne'])
        print(log_df_info(df, head=-1))
        assert 'axlevibration' in df.columns
        assert 'MixedCaseAxlevibration' not in df.columns
        assert 'axlevibrationplusone' in df.columns
        assert 'MixedCaseAxlevibrationPlusOne' in df.columns
        assert 'daily_axlevibration' not in df.columns
        assert 'group_daily_axlevibration' not in df.columns
        assert 'hourly_axlevibration' not in df.columns
        assert 'group_hourly_axlevibration' not in df.columns

        # time-series aggregated derived data also works, as long as it is not loaded together with other kinds
        df = api.get_entity_type_data(entity_type, data_items=['daily_axlevibration'])
        print(log_df_info(df, head=-1))
        assert 'axlevibration' not in df.columns
        assert 'MixedCaseAxlevibration' not in df.columns
        assert 'axlevibrationplusone' not in df.columns
        assert 'MixedCaseAxlevibrationPlusOne' not in df.columns
        assert 'daily_axlevibration' in df.columns
        assert 'group_daily_axlevibration' not in df.columns
        assert 'hourly_axlevibration' not in df.columns
        assert 'group_hourly_axlevibration' not in df.columns

        # time-series aggregated derived data also works, as long as it is not loaded together with other kinds
        # even multiple grain work together
        df = api.get_entity_type_data(entity_type, data_items=['daily_axlevibration', 'hourly_axlevibration'])
        print(log_df_info(df, head=-1))
        assert 'axlevibration' not in df.columns
        assert 'MixedCaseAxlevibration' not in df.columns
        assert 'axlevibrationplusone' not in df.columns
        assert 'MixedCaseAxlevibrationPlusOne' not in df.columns
        assert 'daily_axlevibration' in df.columns
        assert 'group_daily_axlevibration' not in df.columns
        assert 'hourly_axlevibration' in df.columns
        assert 'group_hourly_axlevibration' not in df.columns

        # time-series aggregated derived data cannot be loaded together with time-series raw data
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['axlevibration', 'daily_axlevibration'])

        # time-series aggregated derived data cannot be loaded together with time-series non-aggregated derived data
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['axlevibrationplusone', 'daily_axlevibration'])

        # time series group-wise derived data is not allowed
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['group_daily_axlevibration'])

        # time series group-wise derived data is not allowed
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['axlevibrationplusone', 'group_daily_axlevibration'])

        # explicilty using any unsupported type would case error raised
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['axlevibration', 'axlevibrationplusone', 'daily_axlevibration', 'group_daily_axlevibration'])

        # resampling

        # invalid data_item requested
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['abc'])
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['abc', 'axlevibration'])

        # test resampling which works with derived data also
        df = api.get_entity_type_data(entity_type, data_items=['axlevibration', 'axlevibrationplusone'], time_grain='D')
        print(log_df_info(df[['axlevibration', 'axlevibrationplusone']], head=-1))
        assert 'axlevibration' in df.columns
        assert 'axlevibrationplusone' in df.columns
        assert df.iloc[0]['axlevibration'] + 1 == df.iloc[0]['axlevibrationplusone']

        # test resampling which works with derived data also
        df = api.get_entity_type_data(entity_type, data_items=['axlevibration', 'axlevibrationplusone'], time_grain='H')
        print(log_df_info(df[['axlevibration', 'axlevibrationplusone']], head=-1))
        assert 'axlevibration' in df.columns
        assert 'axlevibrationplusone' in df.columns
        assert df.iloc[0]['axlevibration'] + 1 == df.iloc[0]['axlevibrationplusone']

        # applying resampling on loaded aggregated data (alone) also works
        df = api.get_entity_type_data(entity_type, data_items=['hourly_axlevibration'], time_grain='D')
        print(log_df_info(df[['hourly_axlevibration']], head=-1))
        assert 'hourly_axlevibration' in df.columns

        # raw data resampling does not work with aggregated derived data
        with pytest.raises(ValueError):
            df = api.get_entity_type_data(entity_type, data_items=['axlevibration', 'daily_axlevibration'], time_grain='D')
    except:
        raise
    finally:
        db.drop_table(table_name=grain_Daily.table_name, schema=db_schema)
        db.drop_table(table_name=grain_daily.table_name, schema=db_schema)
        db.drop_table(table_name=base_dm_table, schema=db_schema)
        api.delete_iot_type(iot_entity_type, use_wiotp=False, db=db, db_schema=db_schema)

