# Licensed Materials - Property of IBM
# 5737-M66, 5900-AAA, 5900-AMG
# (C) Copyright IBM Corp. 2019, 2025 All Rights Reserved.
# US Government Users Restricted Rights - Use, duplication, or disclosure
# restricted by GSA ADP Schedule Contract with IBM Corp.

import math
import random

import numpy as np
import pandas as pd
import pytest
from sqlalchemy import func, select

from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import DateOffset

from sklearn.decomposition import PCA, FastICA, KernelPCA, NMF, SparsePCA, TruncatedSVD
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.feature_selection import SelectKBest, VarianceThreshold
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler, Normalizer, StandardScaler
from sklearn.tree import DecisionTreeClassifier

from srom.utils.no_op import NoOp
from srom.feature_engineering.model_based_feature_generator import ModelbasedFeatureGenerator

from .. import api
from ..failure_prediction import FailurePredictionAssetGroupPipeline
from ..util import current_directory, log_df_info


@pytest.fixture(scope='module')
def asset_group_id():
    return 'abcd'


@pytest.fixture
def iot_type():
    return 'abcdsensor_%05d' % math.floor(random.random() * 10**5)


def test_failure_probability_summary_config(asset_group_id, iot_type):
    # test default summary
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '5d',
                },
            )
    assert group.post_processing == [
        {
            "functionName": "Maximum",
            "enabled": True,
            "granularity": "Daily",
            "output": {
                "name": "daily_failure_probability_5d"
            },
            "input": {
                "source": "failure_probability_5d"
            },
        },
        {
            "functionName": "Mean",
            "enabled": True,
            "granularity": "Daily",
            "output": {
                "name": "group_daily_failure_probability_5d"
            },
            "input": {
                "source": "failure_probability_5d"
            },
        },
    ]

    # test default summary for hourly prediction window
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '5h',
                },
            )
    assert group.post_processing == [
        {
            "functionName": "Maximum",
            "enabled": True,
            "granularity": "Hourly",
            "output": {
                "name": "hourly_failure_probability_5h"
            },
            "input": {
                "source": "failure_probability_5h"
            },
        },
        {
            "functionName": "Mean",
            "enabled": True,
            "granularity": "GroupHourly",
            "output": {
                "name": "group_hourly_failure_probability_5h"
            },
            "input": {
                "source": "failure_probability_5h"
            },
        },
    ]


def test_failure_probability_prediction_backtrack(asset_group_id, iot_type):
    model_pipeline = {
        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
        'features_for_training': [':faildate'],
        'predictions': ['failure_probability', 'rca_path'],
        'aggregation_methods': ['mean', 'max', 'min', 'std'],
        'prediction_window_size': '5d',
    }

    # default summary backtrack
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
            )
    # default summary for prediction window 5d is daily, hence need to reset to midnight then back one day, also 
    # because rolling window is by default same as prediction window, it's 5d and hence need go further back 5 days 
    # to account for that
    assert [[DateOffset(**{"hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"days": 1}), to_offset('5d')], []] == group.get_prediction_backtrack(group.pipeline_config)

    # custom hourly summary backtrack, which should not change the rolling window size based trackback
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                summary={
                    '${predictions[0]}': {
                        'hourly': {
                            'max': None,
                        },
                    },
                },
            )
    # with hourly summary, just need to go back 1 hour, but since rolling window is by default same as prediction 
    # window, it's 5d and hence need go further back 5 days to account for that
    assert [[DateOffset(**{"minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"hours": 1}), to_offset('5d')], []] == group.get_prediction_backtrack(group.pipeline_config)

    # custom weekly summary backtrack
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                summary={
                    '${predictions[0]}': {
                        'hourly': {
                            'max': None,
                        },
                        'weekly': {
                            'max': None,
                        },
                    },
                },
            )
    # now the summary is weekly so need to go back enough to grab weekly worth of data, but then also need to 
    # account for the rolling window 5d
    assert [[DateOffset(**{"weekday": 6, "hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(n=1 if pd.Timestamp('today').weekday() == 6 else 2, **{"weeks": 1}), to_offset('5d')], []] == group.get_prediction_backtrack(group.pipeline_config)

    # custom monthly summary backtrack
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                summary={
                    '${predictions[0]}': {
                        'hourly': {
                            'max': None,
                        },
                        'weekly': {
                            'max': None,
                        },
                        'monthly': {
                            'max': None,
                        },
                    },
                },
            )
    # now the summary is weekly so need to go back enough to grab weekly worth of data, but then also need to 
    # account for the rolling window 5d
    assert [[DateOffset(**{"day": 1, "hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"months": 1}), to_offset('5d')], []] == group.get_prediction_backtrack(group.pipeline_config)

    # using explicity rolling window size much smaller than the daily granularity

    model_pipeline['rolling_window_size'] = '4h'

    # default summary backtrack
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
            )
    # default summary for prediction window 5d is daily, hence need to reset to midnight then back one day, also 
    # because rolling window is by default same as prediction window, it's 4h and hence need go further back 4 hours
    # to account for that
    assert [[DateOffset(**{"hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"days": 1}), to_offset('4h')], []] == group.get_prediction_backtrack(group.pipeline_config)
 
    # custom hourly summary backtrack, which should not change the rolling window size based trackback
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                summary={
                    '${predictions[0]}': {
                        'hourly': {
                            'max': None,
                        },
                    },
                },
            )
    # with hourly summary, just need to go back 1 hour, but since rolling window is by default same as prediction 
    # window, it's 4h and hence need go further back 4 hours to account for that
    assert [[DateOffset(**{"minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"hours": 1}), to_offset('4h')], []] == group.get_prediction_backtrack(group.pipeline_config)

    # custom weekly summary backtrack
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                summary={
                    '${predictions[0]}': {
                        'hourly': {
                            'max': None,
                        },
                        'weekly': {
                            'max': None,
                        },
                    },
                },
            )
    # now the summary is weekly so need to go back enough to grab weekly worth of data, but then also need to 
    # account for the rolling window 4h
    assert [[DateOffset(**{"weekday": 6, "hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(n=1 if pd.Timestamp('today').weekday() == 6 else 2, **{"weeks": 1}), to_offset('4h')], []] == group.get_prediction_backtrack(group.pipeline_config)

    # custom monthly summary backtrack
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                summary={
                    '${predictions[0]}': {
                        'hourly': {
                            'max': None,
                        },
                        'weekly': {
                            'max': None,
                        },
                        'monthly': {
                            'max': None,
                        },
                    },
                },
            )
    # now the summary is weekly so need to go back enough to grab weekly worth of data, but then also need to 
    # account for the rolling window 4h
    assert [[DateOffset(**{"day": 1, "hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"months": 1}), to_offset('4h')], []] == group.get_prediction_backtrack(group.pipeline_config)


def test_failure_probability_model_config_aggregation_methods(asset_group_id, iot_type):
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '5d',
                },
            )
    assert ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'] == group.pipeline_config['aggregation_methods']

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '5d',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['count'],
                    'prediction_window_size': '5d',
                },
            )
    assert ['count'] == group.pipeline_config['aggregation_methods']

    # only one invalid method
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['abc'],
                        'prediction_window_size': '5d',
                    },
                )

    # include one invalid method
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['min', 'max', 'mean', 'abc'],
                        'prediction_window_size': '5d',
                    },
                )

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': {'axlevibration': ['count']},
                    'prediction_window_size': '5d',
                },
            )
    pipeline_config = group._get_pipeline_config()
    group.prepare_execute(group._entity_type.get_calc_pipeline(), pipeline_config)
    assert {'axlevibration', 'axlemomentum', 'axlevibration__count__5d'} == set(pipeline_config['features'])

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': {'axlevibration': ['count'], 'axlemomentum': ['min', 'max', 'mean']},
                    'prediction_window_size': '5d',
                },
            )
    pipeline_config = group._get_pipeline_config()
    group.prepare_execute(group._entity_type.get_calc_pipeline(), pipeline_config)
    assert {'axlevibration', 'axlemomentum', 'axlevibration__count__5d', 'axlemomentum__min__5d', 'axlemomentum__max__5d', 'axlemomentum__mean__5d'} == set(pipeline_config['features'])

    # include one invalid method
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': {'axlevibration': ['abc'], 'axlemomentum': ['min', 'max', 'mean']},
                        'prediction_window_size': '5d',
                    },
                )


def test_failure_probability_model_config_rolling_window_size(asset_group_id, iot_type):
    # default rolling window from prediction window
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '5d',
                },
            )
    assert '5d' == group.pipeline_config['rolling_window_size']

    # default rolling window from prediction window
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '5h',
                },
            )
    assert '5h' == group.pipeline_config['rolling_window_size']

    # explicit rolling window, 5 seconds
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '6h',
                    'rolling_window_size': '5s',
                },
            )
    assert '5s' == group.pipeline_config['rolling_window_size']

    # explicit rolling window, 5 minutes
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '3d',
                    'rolling_window_size': '5t',
                },
            )
    assert '5t' == group.pipeline_config['rolling_window_size']

    # explicit rolling window, 5 minutes
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '3d',
                    'rolling_window_size': '5min',
                },
            )
    assert '5min' == group.pipeline_config['rolling_window_size']

    # explicit rolling window, 6 hours
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '3d',
                    'rolling_window_size': '6h',
                },
            )
    assert '6h' == group.pipeline_config['rolling_window_size']

    # explicit rolling window, 10 days
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '30d',
                    'rolling_window_size': '10d',
                },
            )
    assert '10d' == group.pipeline_config['rolling_window_size']

    # explicit rolling window larger than prediction window
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '5d',
                    'rolling_window_size': '10d',
                },
            )
    assert '5d' == group.pipeline_config['prediction_window_size']
    assert '10d' == group.pipeline_config['rolling_window_size']

    # invalid rolling window unit, week
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['min', 'max', 'mean', 'abc'],
                        'prediction_window_size': '5w',
                    },
                )

    # invalid rolling window unit, month
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['min', 'max', 'mean', 'abc'],
                        'prediction_window_size': '60d',
                        'rolling_window_size': '3m',
                    },
                )


def test_failure_probability_model_config_prediction_window(asset_group_id, iot_type):
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '5d',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert '5d' == group.pipeline_config['prediction_window_size']
    assert '5d' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_5d', 'rca_path_5d'] == group.pipeline_config['predictions']

    # special single 'd'
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': 'd',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert 'd' == group.pipeline_config['prediction_window_size']
    assert 'd' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_1d', 'rca_path_1d'] == group.pipeline_config['predictions']

    # special single 'D'
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability_1d', 'rca_path_1d'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': 'D',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert 'D' == group.pipeline_config['prediction_window_size']
    assert 'D' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_1d', 'rca_path_1d'] == group.pipeline_config['predictions']

    # special '1d'
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '1d',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert '1d' == group.pipeline_config['prediction_window_size']
    assert '1d' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_1d', 'rca_path_1d'] == group.pipeline_config['predictions']

    # special '1D'
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '1D',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert '1D' == group.pipeline_config['prediction_window_size']
    assert '1D' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_1d', 'rca_path_1d'] == group.pipeline_config['predictions']

    # if given prediction already with postfix, no confict
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability_1d', 'rca_path_1d'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '1d',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert '1d' == group.pipeline_config['prediction_window_size']
    assert '1d' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_1d', 'rca_path_1d'] == group.pipeline_config['predictions']

    # explicityly given prediction names are preserved (case sensitive) without appending extra postfix
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_Probability_1D', 'rca_path_1d'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '1d',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert '1d' == group.pipeline_config['prediction_window_size']
    assert '1d' == group.pipeline_config['rolling_window_size']
    assert ['failure_Probability_1D', 'rca_path_1d'] == group.pipeline_config['predictions']

    # uniform lower-case postfix
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '1D',
                },
            )
    assert ['mean', 'max', 'min', 'std'] == group.pipeline_config['aggregation_methods']
    assert '1D' == group.pipeline_config['prediction_window_size']
    assert '1D' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_1d', 'rca_path_1d'] == group.pipeline_config['predictions']

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'prediction_window_size': '10d',
                },
            )
    assert ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'] == group.pipeline_config['aggregation_methods']
    assert '10d' == group.pipeline_config['prediction_window_size']
    assert '10d' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_10d', 'rca_path_10d'] == group.pipeline_config['predictions']

    # different rolling window size, and no re-padding prediction name postfix
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability_10d', 'rca_path_10d'],
                    'prediction_window_size': '10d',
                    'rolling_window_size': '5d',
                },
            )
    assert ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'] == group.pipeline_config['aggregation_methods']
    assert '10d' == group.pipeline_config['prediction_window_size']
    assert '5d' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_10d', 'rca_path_10d'] == group.pipeline_config['predictions']

    # padding prediction window size to prediction output names
    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability_10d', 'rca_path_10d'],
                    'prediction_window_size': '12h',
                },
            )
    assert ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'] == group.pipeline_config['aggregation_methods']
    assert '12h' == group.pipeline_config['prediction_window_size']
    assert '12h' == group.pipeline_config['rolling_window_size']
    assert ['failure_probability_10d_12h', 'rca_path_10d_12h'] == group.pipeline_config['predictions']

    # invalid prediction window given, no unit
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability_10d', 'rca_path_10d'],
                        'prediction_window_size': '10',
                    },
                )

    # invalid prediction window
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability_10m', 'rca_path_10m'],
                        'prediction_window_size': '10ms',
                    },
                )

    # invalid rolling window given
    with pytest.raises(ValueError):
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability_10d', 'rca_path_10d'],
                        'prediction_window_size': '10d',
                        'rolling_window_size': 'abc',
                    },
                )

    # test auto padding prediction window at the end of prediction names, if not already

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path_5'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '5d',
                },
            )
    assert ['failure_probability_5d', 'rca_path_5_5d'] == group.pipeline_config['predictions']

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path_5h'],
                    'aggregation_methods': ['mean', 'max', 'min', 'std'],
                    'prediction_window_size': '5h',
                },
            )
    assert ['failure_probability_5h', 'rca_path_5h'] == group.pipeline_config['predictions']


def test_data_substitution_validation(asset_group_id, iot_type):
    df_data_asset = pd.read_csv('%s/trainbrake_asset_faildates.csv' % current_directory(file=__file__), parse_dates=['faildate'])
    df_data_asset['asset'] = np.where(df_data_asset['asset'] == 'TRAINBRAKE1', 'abcd-1', 'abcd-2')
    df_data_asset['assetid'] = df_data_asset['asset'] + '-____-' + df_data_asset['site']
    df_data_asset['datetime'] = df_data_asset['faildate']
    df_data_sensor = pd.read_csv('%s/trainbrake_device_data.csv' % current_directory(file=__file__), parse_dates=['RCV_TIMESTAMP_UTC'])[:10000]
    df_data_sensor['DEVICETYPE'] = iot_type
    df_data_sensor['DEVICEID'] = np.where(df_data_sensor['DEVICEID'] == 'TrainBrake_1', 'abcd-1', 'abcd-2')

    model_pipeline = {
        'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
        'features_for_training': [':faildate'],
        'predictions': ['failure_probability_1d', 'rca_path_1d'],
        'aggregation_methods': ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'],
        'prediction_window_size': '1d',
    }
    asset_device_mappings = {
        'abcd-1-____-BEDFORD': ['%s:abcd-1' % iot_type], 
        'abcd-2-____-BEDFORD': [],
    }
    substitution_asset = [
        {
            'df': df_data_asset,
            'keys': ['assetid'],
            'columns': ['faildate'],
            'timestamp': 'datetime',
        },
    ]
    substitution_iot = [
        {
            'df': df_data_sensor,
            'keys': ['DEVICEID'],
            'columns': [
                'TRAINBRAKESIMULATION_AXLEVIBRATION',
                'TRAINBRAKESIMULATION_AXLEMOMENTUM',
            ],
            'timestamp': 'RCV_TIMESTAMP_UTC',
            'rename_columns': {
                'TRAINBRAKESIMULATION_AXLEVIBRATION': 'axlevibration',
                'TRAINBRAKESIMULATION_AXLEMOMENTUM': 'axlemomentum',
            },
        },
    ]

    # training

    FailurePredictionAssetGroupPipeline(
            asset_group_id=asset_group_id,
            model_pipeline=model_pipeline,
            asset_device_mappings=asset_device_mappings,
            data_substitution={
                '': substitution_asset,
                iot_type: substitution_iot,
            },
        )

    # when training, runtime error raised if given invalid entity type
    with pytest.raises(ValueError):
        FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                asset_device_mappings=asset_device_mappings,
                data_substitution={
                    '': substitution_asset,
                    iot_type + '_abc': substitution_iot,
                },
            )

    # scoring

    FailurePredictionAssetGroupPipeline(
            asset_group_id=asset_group_id,
            model_pipeline=model_pipeline,
            asset_device_mappings=asset_device_mappings,
            model_timestamp={'FailurePredictionRcaEstimator': '1570169739', 'FailurePredictionEstimator': '1570169523'},
            data_substitution={
                iot_type: substitution_iot,
            },
        )

    # when scoring, runtime error raised if given invalid entity type
    with pytest.raises(ValueError):
        FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                asset_device_mappings=asset_device_mappings,
                model_timestamp={'FailurePredictionRcaEstimator': '1570169739', 'FailurePredictionEstimator': '1570169523'},
                data_substitution={
                    iot_type + '_abc': substitution_iot,
                },
            )

    # when scoring, runtime error raised if given unused asset data (since scoring does not need it)
    with pytest.raises(ValueError):
        FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline=model_pipeline,
                asset_device_mappings=asset_device_mappings,
                model_timestamp={'FailurePredictionRcaEstimator': '1570169739', 'FailurePredictionEstimator': '1570169523'},
                data_substitution={
                    '': substitution_asset,
                    iot_type: substitution_iot,
                },
            )


def test_failure_probability_data_substitution(asset_group_id, iot_type):
    df_data_asset = pd.read_csv('%s/trainbrake_asset_faildates.csv' % current_directory(file=__file__), parse_dates=['faildate'])
    df_data_asset['asset'] = np.where(df_data_asset['asset'] == 'TRAINBRAKE1', 'abcd-1', 'abcd-2')
    df_data_asset['assetid'] = df_data_asset['asset'] + '-____-' + df_data_asset['site']
    df_data_asset['datetime'] = df_data_asset['faildate']
    df_data_sensor = pd.read_csv('%s/trainbrake_device_data.csv' % current_directory(file=__file__), parse_dates=['RCV_TIMESTAMP_UTC'])[:10000]
    df_data_sensor['DEVICETYPE'] = iot_type
    df_data_sensor['DEVICEID'] = np.where(df_data_sensor['DEVICEID'] == 'TrainBrake_1', 'abcd-1', 'abcd-2')

    group = FailurePredictionAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:axlevibration' % iot_type, '%s:axlemomentum' % iot_type],
                    'features_for_training': [':faildate'],
                    'predictions': ['failure_probability', 'rca_path'],
                    'aggregation_methods': ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'],
                    'prediction_window_size': '5d',
                },
                asset_device_mappings={
                    'abcd-1-____-BEDFORD': ['%s:abcd-1' % iot_type], 
                    'abcd-2-____-BEDFORD': [],
                },
                data_substitution={
                    '': [
                        {
                            'df': df_data_asset,
                            'keys': ['assetid'],
                            'columns': ['faildate'],
                            'timestamp': 'datetime',
                        },
                    ],
                    iot_type: [
                        {
                            'df': df_data_sensor,
                            'keys': ['DEVICEID'],
                            'columns': [
                                'TRAINBRAKESIMULATION_AXLEVIBRATION',
                                'TRAINBRAKESIMULATION_AXLEMOMENTUM',
                            ],
                            'timestamp': 'RCV_TIMESTAMP_UTC',
                            'rename_columns': {
                                'TRAINBRAKESIMULATION_AXLEVIBRATION': 'axlevibration',
                                'TRAINBRAKESIMULATION_AXLEMOMENTUM': 'axlemomentum',
                            },
                        },
                    ],
                },
            )
    df = group.execute()

    print(log_df_info(df, head=0))

    assert group.new_training
    assert False == group.training
    assert 'FailurePredictionEstimator' in group.model_timestamp and 'FailurePredictionRcaEstimator' in group.model_timestamp
    # assert (2764, 2) == df.shape
    assert (545, 2) == df.shape
    assert 'failure_probability_5d' in df.columns
    assert 'rca_path_5d' in df.columns

    # score right away still using the data substitution
    df_scored = group.predict(start_ts='2019-07-11 03:25:00', end_ts='2019-07-17 00:00:00')

    print(log_df_info(df_scored, head=0))

    # assert (2346, 2) == df_scored.shape
    assert (132, 2) == df_scored.shape
    assert 'failure_probability_5d' in df_scored.columns
    assert 'rca_path_5d' in df_scored.columns


def test_failure_probability(asset_group_id, iot_type):
    df_data_asset = pd.read_csv('%s/trainbrake_asset_faildates.csv' % current_directory(file=__file__), parse_dates=['faildate'])
    df_data_asset['asset'] = np.where(df_data_asset['asset'] == 'TRAINBRAKE1', 'abcd-1', 'abcd-2')
    df_asset_group = df_data_asset.groupby(['site', 'asset']).size().reset_index()[['site', 'asset']]

    df_data_sensor = pd.read_csv('%s/trainbrake_device_data.csv' % current_directory(file=__file__), parse_dates=['RCV_TIMESTAMP_UTC'])[:10000]
    df_data_sensor['DEVICEID'] = np.where(df_data_sensor['DEVICEID'] == 'TrainBrake_1', 'abcd-1', 'abcd-2')

    df_mappings = pd.DataFrame(
        columns=['site', 'asset', 'devicetype', 'deviceid'],
        data=[
            ['BEDFORD', 'abcd-1', iot_type, 'abcd-1'],
        ],
    )

    try:
        db = api._get_db()
        db_schema = None

        api.set_asset_group_members(asset_group_id=asset_group_id, df=df_asset_group, db=db, db_schema=db_schema)

        api.set_asset_device_mappings(df=df_mappings, db=db, db_schema=db_schema)

        api.set_asset_cache(df=df_data_asset, siteid_column='site', assetid_column='asset', faildate_column='faildate', db=db, db_schema=db_schema)

        api.setup_iot_type(iot_type, df_data_sensor, columns=['TRAINBRAKESIMULATION_AXLEVIBRATION', 'TRAINBRAKESIMULATION_AXLEMOMENTUM'], deviceid_column='DEVICEID', timestamp_column='RCV_TIMESTAMP_UTC', timestamp_in_payload=False, parse_dates=None, rename_columns={'TRAINBRAKESIMULATION_AXLEVIBRATION': 'Axlevibration', 'TRAINBRAKESIMULATION_AXLEMOMENTUM': 'Axlemomentum'}, write='deletefirst', use_wiotp=False, import_only=False, db=db, db_schema=db_schema)

        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:Axlevibration' % iot_type, '%s:Axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'],
                        'prediction_window_size': '5d',
                    },
                )
        df = group.execute()

        print(log_df_info(df, head=0))

        assert group.new_training
        assert False == group.training
        assert 'FailurePredictionEstimator' in group.model_timestamp and 'FailurePredictionRcaEstimator' in group.model_timestamp
        assert (545, 2) == df.shape
        assert 'failure_probability_5d' in df.columns
        assert 'rca_path_5d' in df.columns

        target_tables = ['dm_%s' % asset_group_id, 'dm_%s_daily' % asset_group_id, 'dm_%s_Daily' % asset_group_id]
        for table_name in target_tables:
            try:
                table = db.get_table(table_name, db_schema)
            except:
                pass
            else:
                db.connection.execute(table.delete())

        # test writing directly
        group._write(df)

        table = db.get_table('dm_%s' % asset_group_id, db_schema)
        assert 1090 == db.connection.execute(select([func.count()]).select_from(table)).first()[0]
        table = db.get_table('dm_%s_daily' % asset_group_id, db_schema)
        assert 3 == db.connection.execute(select([func.count()]).select_from(table)).first()[0]
        table = db.get_table('dm_%s_Daily' % asset_group_id, db_schema)
        assert 3 == db.connection.execute(select([func.count()]).select_from(table)).first()[0]

        df_scored = group.predict(start_ts='2019-07-11 03:25:00', end_ts='2019-07-17 00:00:00')

        print(log_df_info(df_scored, head=0))

        assert (132, 2) == df_scored.shape
        assert 'failure_probability_5d' in df_scored.columns
        assert 'rca_path_5d' in df_scored.columns

        # test empty input data for scoring, which should return empty df with prediction columns added
        df_scored = group.predict(start_ts='1976-01-01 03:25:00', end_ts='1976-01-02 00:00:00')

        print(log_df_info(df_scored, head=0))

        assert (0, 2) == df_scored.shape
        assert 'failure_probability_5d' in df_scored.columns
        assert 'rca_path_5d' in df_scored.columns

        # test hourly summary
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:Axlevibration' % iot_type, '%s:Axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['mean', 'max', 'min', 'std'],
                        'prediction_window_size': '8h',
                    },
                )
        df = group.execute()

        print(log_df_info(df, head=0))

        assert group.new_training
        assert False == group.training
        assert 'FailurePredictionEstimator' in group.model_timestamp and 'FailurePredictionRcaEstimator' in group.model_timestamp
        assert (4010, 2) == df.shape
        assert 'failure_probability_8h' in df.columns
        assert 'rca_path_8h' in df.columns
    except:
        raise
    finally:
        api.delete_iot_type(iot_type, use_wiotp=False, db=db, db_schema=db_schema)

        api.delete_asset_cache(df=df_data_asset, db=db, db_schema=db_schema)

        api.delete_asset_device_mappings(df=df_mappings, db=db, db_schema=db_schema)

        api.delete_asset_group_members(asset_group_id=asset_group_id, db=db, db_schema=db_schema)


def test_failure_probability_override(asset_group_id, iot_type):

    feature_transformation_set = [('skiptransformation', NoOp()),('normalizer', Normalizer())]

    scaler_set = [('skipscaling', NoOp()),('standardscaler', StandardScaler()),('minmaxscaler', MinMaxScaler())]

    feature_preprocessing_set = [('skipfeaturepreprocessing', NoOp()),('pca', PCA()), ('fastica', FastICA()), ('kernelpca', KernelPCA()), ('selectkbest', SelectKBest()), ('variancethreshold', VarianceThreshold())]

    estimator_feature_generator = [('skipmodelfeaturegeneration', NoOp()),('multinomialnbfeature', ModelbasedFeatureGenerator(MultinomialNB())),\
                                ('decisiontreeclassifierfeature', ModelbasedFeatureGenerator(DecisionTreeClassifier())), ('extratreesclassifierfeature', ModelbasedFeatureGenerator(ExtraTreesClassifier())), \
                             ('randomforestclassifierfeature', ModelbasedFeatureGenerator(RandomForestClassifier()))]




    

    estimator_set = [

           
            ('randomforestclassifier', RandomForestClassifier())

           

                    ]

    my_stage = [feature_transformation_set,

                       scaler_set,

                       feature_preprocessing_set,

                       estimator_feature_generator,

                       estimator_set]

    print(my_stage)



    df_data_asset = pd.read_csv('%s/trainbrake_asset_faildates.csv' % current_directory(file=__file__), parse_dates=['faildate'])
    df_data_asset['asset'] = np.where(df_data_asset['asset'] == 'TRAINBRAKE1', 'abcd-1', 'abcd-2')
    df_asset_group = df_data_asset.groupby(['site', 'asset']).size().reset_index()[['site', 'asset']]

    df_data_sensor = pd.read_csv('%s/trainbrake_device_data.csv' % current_directory(file=__file__), parse_dates=['RCV_TIMESTAMP_UTC'])[:10000]
    df_data_sensor['DEVICEID'] = np.where(df_data_sensor['DEVICEID'] == 'TrainBrake_1', 'abcd-1', 'abcd-2')

    df_mappings = pd.DataFrame(
        columns=['site', 'asset', 'devicetype', 'deviceid'],
        data=[
            ['BEDFORD', 'abcd-1', iot_type, 'abcd-1'],
        ],
    )

    try:
        db = api._get_db()
        db_schema = None

        api.set_asset_group_members(asset_group_id=asset_group_id, df=df_asset_group, db=db, db_schema=db_schema)

        api.set_asset_device_mappings(df=df_mappings, db=db, db_schema=db_schema)

        api.set_asset_cache(df=df_data_asset, siteid_column='site', assetid_column='asset', faildate_column='faildate', db=db, db_schema=db_schema)

        api.setup_iot_type(iot_type, df_data_sensor, columns=['TRAINBRAKESIMULATION_AXLEVIBRATION', 'TRAINBRAKESIMULATION_AXLEMOMENTUM'], deviceid_column='DEVICEID', timestamp_column='RCV_TIMESTAMP_UTC', timestamp_in_payload=False, parse_dates=None, rename_columns={'TRAINBRAKESIMULATION_AXLEVIBRATION': 'Axlevibration', 'TRAINBRAKESIMULATION_AXLEMOMENTUM': 'Axlemomentum'}, write='deletefirst', use_wiotp=False, import_only=False, db=db, db_schema=db_schema)

        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:Axlevibration' % iot_type, '%s:Axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['mean', 'max', 'min', 'median', 'std', 'sum', 'count'],
                        'prediction_window_size': '5d',
                        'scoring':'r2',
                        'override_training_stages': my_stage
                    },
                )
        df = group.execute()

        print(log_df_info(df, head=0))

        assert group.new_training
        assert False == group.training
        assert 'FailurePredictionEstimator' in group.model_timestamp and 'FailurePredictionRcaEstimator' in group.model_timestamp
        assert (545, 2) == df.shape
        assert 'failure_probability_5d' in df.columns
        assert 'rca_path_5d' in df.columns

        target_tables = ['dm_%s' % asset_group_id, 'dm_%s_daily' % asset_group_id, 'dm_%s_Daily' % asset_group_id]
        for table_name in target_tables:
            try:
                table = db.get_table(table_name, db_schema)
            except:
                pass
            else:
                db.connection.execute(table.delete())

        # test writing directly
        group._write(df)

        table = db.get_table('dm_%s' % asset_group_id, db_schema)
        assert 1090 == db.connection.execute(select([func.count()]).select_from(table)).first()[0]
        table = db.get_table('dm_%s_daily' % asset_group_id, db_schema)
        assert 3 == db.connection.execute(select([func.count()]).select_from(table)).first()[0]
        table = db.get_table('dm_%s_Daily' % asset_group_id, db_schema)
        assert 3 == db.connection.execute(select([func.count()]).select_from(table)).first()[0]

        df_scored = group.predict(start_ts='2019-07-11 03:25:00', end_ts='2019-07-17 00:00:00')

        print(log_df_info(df_scored, head=0))

        assert (132, 2) == df_scored.shape
        assert 'failure_probability_5d' in df_scored.columns
        assert 'rca_path_5d' in df_scored.columns

        # test empty input data for scoring, which should return empty df with prediction columns added
        df_scored = group.predict(start_ts='1976-01-01 03:25:00', end_ts='1976-01-02 00:00:00')

        print(log_df_info(df_scored, head=0))

        assert (0, 2) == df_scored.shape
        assert 'failure_probability_5d' in df_scored.columns
        assert 'rca_path_5d' in df_scored.columns

        # test hourly summary
        group = FailurePredictionAssetGroupPipeline(
                    asset_group_id=asset_group_id,
                    model_pipeline={
                        'features': ['%s:Axlevibration' % iot_type, '%s:Axlemomentum' % iot_type],
                        'features_for_training': [':faildate'],
                        'predictions': ['failure_probability', 'rca_path'],
                        'aggregation_methods': ['mean', 'max', 'min', 'std'],
                        'prediction_window_size': '8h',
                    },
                )
        df = group.execute()

        print(log_df_info(df, head=0))

        assert group.new_training
        assert False == group.training
        assert 'FailurePredictionEstimator' in group.model_timestamp and 'FailurePredictionRcaEstimator' in group.model_timestamp
        assert (4010, 2) == df.shape
        assert 'failure_probability_8h' in df.columns
        assert 'rca_path_8h' in df.columns
    except:
        raise
    finally:
        api.delete_iot_type(iot_type, use_wiotp=False, db=db, db_schema=db_schema)

        api.delete_asset_cache(df=df_data_asset, db=db, db_schema=db_schema)

        api.delete_asset_device_mappings(df=df_mappings, db=db, db_schema=db_schema)

        api.delete_asset_group_members(asset_group_id=asset_group_id, db=db, db_schema=db_schema)



