# Licensed Materials - Property of IBM
# 5737-M66, 5900-AAA
# (C) Copyright IBM Corp. 2019, 2025 All Rights Reserved.
# US Government Users Restricted Rights - Use, duplication, or disclosure
# restricted by GSA ADP Schedule Contract with IBM Corp.

import numpy as np
import pandas as pd
import pytest
from sqlalchemy import func, select

from pandas.tseries.frequencies import to_offset
from pandas.tseries.offsets import DateOffset

from .. import api
from ..time_to_failure import TimeToFailureAssetGroupPipeline
from ..failure_prediction import FailurePredictionAssetGroupPipeline
from ..util import current_directory, log_df_info


def test_prepare_execution_adding_training_features(asset_group_id='abcd', iot_type=None):
    if iot_type is None:
        import math
        import random
        iot_type = 'abcdsensor_%05d' % math.floor(random.random() * 10**5)

    group = TimeToFailureAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:%s' % (iot_type, col) for col in ['sa_humidity_episode','sa_temp_episode','ra_humidity_episode','ra_temp_episode','return_co2_episode','chw_valve_feedback_episode','lthw_valve_feedback_episode','off_coil_temp_episode']],
                    
                    
                    #'features_for_training': [ ':faildate',':problemcode'],
                    
                    'predictions': ['predicted_time_to_failure'],
                    'failure_mode': 'PUMPS/STOPPED',
                    
                    'aggregation_methods':['min'],
                    'aggregate_window_size': 3,
                   # 'aggregate_type_for_prediction_interval': 'mean',
                    'smart_regression':True
                },
            )

    original_feature_for_training = group.pipeline_config['features_for_training'].copy()

    # test there should still be 2 training features changed by prepare_execute()
    # just one feature: days_to_fail
    group.prepare_execute(group._entity_type.get_calc_pipeline(), group.pipeline_config)
    assert 1 == len(group.pipeline_config['features_for_training'])

    # test multiple calls to prepare_execute() should not add multiple times of extra training features
    #group.prepare_execute(group._entity_type.get_calc_pipeline(), group.pipeline_config)
    #assert 1 == len(group.pipeline_config['features_for_training'])

    #df=group.execute()


def test_time_to_failure_prediction_backtrack(asset_group_id='abcd', iot_type=None):
    if iot_type is None:
        import math
        import random
        iot_type = 'abcdsensor_%05d' % math.floor(random.random() * 10**5)

    # test default summary
    group = TimeToFailureAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:%s' % (iot_type, col) for col in ['sa_humidity_episode','sa_temp_episode','ra_humidity_episode','ra_temp_episode','return_co2_episode','chw_valve_feedback_episode','lthw_valve_feedback_episode','off_coil_temp_episode']],
                    'features_for_training': [':installdate', ':faildate'],
                    'predictions': ['predicted_time_to_failure'],
                },
            )
    assert [[DateOffset(**{"hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"days": 1})], []] == group.get_prediction_backtrack(group.pipeline_config)

    # test custom hourly summary
    group = TimeToFailureAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:%s' % (iot_type, col) for col in ['sa_humidity_episode','sa_temp_episode','ra_humidity_episode','ra_temp_episode','return_co2_episode','chw_valve_feedback_episode','lthw_valve_feedback_episode','off_coil_temp_episode']],
                    'features_for_training': [':installdate', ':faildate'],
                    'predictions': ['predicted_time_to_failure'],
                },
                summary={
                    '${predictions[0]}': {
                        'hourly': {
                            'max': None,
                        },
                    },
                },
            )
    assert [[DateOffset(**{"minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"hours": 1})], []] == group.get_prediction_backtrack(group.pipeline_config)

    # test custom weekly summary
    group = TimeToFailureAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:%s' % (iot_type, col) for col in ['sa_humidity_episode','sa_temp_episode','ra_humidity_episode','ra_temp_episode','return_co2_episode','chw_valve_feedback_episode','lthw_valve_feedback_episode','off_coil_temp_episode']],
                    'features_for_training': [':installdate', ':faildate'],
                    'predictions': ['predicted_time_to_failure'],
                },
                summary={
                    '${predictions[0]}': {
                        'weekly': {
                            'max': None,
                        },
                    },
                },
            )
    assert [[DateOffset(**{"weekday": 6, "hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(n=1 if pd.Timestamp('today').weekday() == 6 else 2, **{"weeks": 1})], []] == group.get_prediction_backtrack(group.pipeline_config)

    # test custom monthly summary
    group = TimeToFailureAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:%s' % (iot_type, col) for col in ['sa_humidity_episode','sa_temp_episode','ra_humidity_episode','ra_temp_episode','return_co2_episode','chw_valve_feedback_episode','lthw_valve_feedback_episode','off_coil_temp_episode']],
                    'features_for_training': [':installdate', ':faildate'],
                    'predictions': ['predicted_time_to_failure'],
                },
                summary={
                    '${predictions[0]}': {
                        'monthly': {
                            'max': None,
                        },
                    },
                },
            )
    assert [[DateOffset(**{"day": 1, "hour": 0, "minute": 0, "second": 0, "microsecond": 0}), DateOffset(**{"months": 1})], []] == group.get_prediction_backtrack(group.pipeline_config)


def test_time_to_failure_summary_config(asset_group_id='abcd', iot_type=None):
    if iot_type is None:
        import math
        import random
        iot_type = 'abcdsensor_%05d' % math.floor(random.random() * 10**5)

    # test default summary
    group = TimeToFailureAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:%s' % (iot_type, col) for col in ['sa_humidity_episode','sa_temp_episode','ra_humidity_episode','ra_temp_episode','return_co2_episode','chw_valve_feedback_episode','lthw_valve_feedback_episode','off_coil_temp_episode']],
                    'features_for_training': [':installdate', ':faildate'],
                    'predictions': ['predicted_time_to_failure'],
                },
            )
    assert group.post_processing == [
        {
            "functionName": "Maximum",
            "enabled": True,
            "granularity": "Daily",
            "output": {
                "name": "daily_predicted_time_to_failure"
            },
            "input": {
                "source": "predicted_time_to_failure"
            },
        }
    ]


def test_time_to_failure_data_substitution(asset_group_id='abcd', iot_type=None):
    if iot_type is None:
        import math
        import random
        iot_type = 'abcdsensor_%05d' % math.floor(random.random() * 10**5)

    df_data = pd.read_csv('%s/ahu_time_to_failure.csv' % current_directory(file=__file__), parse_dates=['faildate'])
    df_data['deviceid'] = df_data['id'].str.replace('-____-BEDFORD', '')
    df_data = df_data.drop(columns='id')
    print('df_data=%s' % log_df_info(df_data, head=5))

    # to make it simple, we just use same id for asset/sensor pair, split the dataframe 
    # into 3 for data substitution
    df_asset_ts = df_data[['deviceid', 'faildate']].dropna(subset=['faildate'])
    df_asset_ts['event_timestamp'] = df_data['faildate']
    print('df_asset_ts=%s' % log_df_info(df_asset_ts, head=5))
    # df_asset_dim = df_data[pd.notna(df_data['installdate'])][]
    df_asset_dim = df_data[['deviceid', 'installdate']].groupby('deviceid').first().reset_index()
    print('df_asset_dim=%s' % log_df_info(df_asset_dim, head=5))
    df_sensor = df_data.drop(columns=['installdate', 'faildate'])
    print('df_sensor=%s' % log_df_info(df_sensor, head=5))

    group = TimeToFailureAssetGroupPipeline(
                asset_group_id=asset_group_id,
                model_pipeline={
                    'features': ['%s:%s' % (iot_type, col) for col in ['sa_humidity_episode','sa_temp_episode','ra_humidity_episode','ra_temp_episode','return_co2_episode','chw_valve_feedback_episode','lthw_valve_feedback_episode','off_coil_temp_episode']],
                    'features_for_training': [':installdate', ':faildate'],
                    'predictions': ['predicted_time_to_failure'],
                },
                asset_device_mappings={deviceid: ['%s:%s' % (iot_type, deviceid)] for deviceid in df_asset_dim['deviceid']},
                data_substitution={
                    '': [
                        {
                            'df': df_asset_ts,
                            'keys': ['deviceid'],
                            'columns': ['faildate'],
                            'timestamp': 'event_timestamp'
                        },
                        {
                            'df': df_asset_dim,
                            'keys': ['deviceid'],
                            'columns': ['installdate'],
                        },
                    ],
                    iot_type: [
                        {
                            'df': df_sensor,
                            'keys': ['deviceid'],
                            'columns': [
                                'sa_humidity_episode',
                                'sa_temp_episode',
                                'ra_humidity_episode',
                                'ra_temp_episode',
                                'return_co2_episode',
                                'chw_valve_feedback_episode',
                                'lthw_valve_feedback_episode',
                                'off_coil_temp_episode',
                            ],
                            'timestamp': 'event_timestamp',
                        },
                    ],
                }
            )
    df = group.execute()

    print(log_df_info(df, head=0))

    assert group.new_training
    assert False == group.training
    assert 'TimeToFailureEstimatorSrom' in group.model_timestamp
    assert (4113, 1) == df.shape
    assert 'predicted_time_to_failure' in df.columns




# def test_smart_regression_with_failure_mode(asset_group_id='abcd', iot_type=None):
#     df_data_sensor = pd.DataFrame()
#     df_data_sensor['rcv_timestamp_utc'] = pd.date_range(end=pd.Timestamp('2020-01-05T10:31:40'), periods=2084, freq='10T', tz='UTC')
#     df_data_sensor['devicetype'] = iot_type
#     df_data_sensor['deviceid'] = 'abcd-1'


#     if iot_type is None:
#         import math
#         import random
#         iot_type = 'abcdsensor_%05d' % math.floor(random.random() * 10**5)

#     from numpy.random import default_rng
#     rg = default_rng(12345)

#     df_data_sensor['current'] = rg.integers(low=0, high=1000, size=len(df_data_sensor))
#     df_data_sensor['voltage'] = rg.integers(low=0, high=1000, size=len(df_data_sensor))

#     print(log_df_info(df_data_sensor, head=5))

    



#     df_data_asset = pd.DataFrame(data={
#         'datetime': [
#             pd.Timestamp('2019-12-25T08:00'),
#             pd.Timestamp('2019-12-31T08:00'),
#             pd.Timestamp('2019-12-31T08:00'),
#             pd.Timestamp('2020-01-03T08:00'),
#         ],
#         'problemcode': [
#             'PUMPS/STOPPED',
#             'PUMPS/STOPPED',
#             'PUMPS/BROKEN',
#             'PUMPS/BROKEN',
#         ],
#         'classcode': [
#             'PUMPS',
#             'PUMPS',
#             'PUMPS',
#             'PUMPS',
#         ]
#     })



#     df_data_asset['asset'] = 'abcd-1'
#     df_data_asset['site'] = 'BEDFORD'
#     df_data_asset['assetid'] = df_data_asset['asset'] + '-____-' + df_data_asset['site']
#     df_data_asset['faildate'] = df_data_asset['datetime']

#     print(log_df_info(df_data_asset, head=5))

#     # non-multi-class with failure_modes specified

#     # comment out below code because of exception from opt/app-root/lib/python3.6/site-packages/sklearn/pipeline.py

#     group = TimeToFailureAssetGroupPipeline(

#                 asset_group_id=asset_group_id,
#                 model_pipeline={
#                     'features': ['%s:current' % iot_type, '%s:voltage' % iot_type],
                    
#                     'predictions': ['predicted_time_to_failure'],
#                     'aggregation_methods':['min'],
#                     'smart_regression': True,
#                     'failure_mode': ['PUMPS/STOPPED']
#                 },
#                 asset_device_mappings={
#                     'abcd-1-____-BEDFORD': ['%s:abcd-1' % iot_type], 
#                 },
#                 data_substitution={
#                     '': [
#                         {
#                             'df': df_data_asset,
#                             'keys': ['assetid'],
#                             'columns': ['problemcode','classcode'],
#                             'timestamp': 'faildate',
#                         },
#                     ],
#                     iot_type: [
#                         {
#                             'df': df_data_sensor,
#                             'keys': ['deviceid'],
#                             'columns': [
#                                 'current',
#                                 'voltage',
#                             ],
#                             'timestamp': 'rcv_timestamp_utc',
#                             'rename_columns': {
#                                 'current': 'current',
#                                 'voltage': 'voltage',
#                             },
#                         },
#                     ],
#                 },
#             )
#     df = group.execute()

#     print(log_df_info(df, head=0))

#     #assert group.new_training
#     #assert False == group.training
#     #assert 'FailurePredictionEstimator' in group.model_timestamp and 'FailurePredictionRcaEstimator' in group.model_timestamp
#     #assert (1655, 2) == df.shape
#     #assert 'failure_probability_3d' in df.columns
#     #assert 'rca_path_3d' in df.columns
#     #assert group.pipeline_config['failure_modes'] == {'STOPPED': [1, True], 'BROKEN': [2, True], 'UNKNOWN': [3, False]}

    

if __name__ == '__main__':
    test_prepare_execution_adding_training_features()
    test_time_to_failure_prediction_backtrack()
    test_time_to_failure_summary_config()
    test_time_to_failure_data_substitution()
    

