# Licensed Materials - Property of IBM
# 5737-M66, 5900-AAA, 5900-AMG
# (C) Copyright IBM Corp. 2019, 2025 All Rights Reserved.
# US Government Users Restricted Rights - Use, duplication, or disclosure
# restricted by GSA ADP Schedule Contract with IBM Corp.

import numpy as np
import pandas as pd
import pytest

import pmlib




from pmlib.failure_prediction import (FailurePredictionAssetGroupPipeline, FailurePredictionEstimator, FailurePredictionRcaEstimator,
                                      FailurePredictionEstimatorFeatureExtraction, MulticlassFailurePredictionEstimator, MulticlassFailurePredictionRcaEstimator)

class myFailurePredictionAssetGroupPipeline(FailurePredictionAssetGroupPipeline):
    def prepare_execute(self, pipeline, model_config):
        """This class overrides this method to use two estimators: `pmlib.failure_prediction.FailurePredictionEstimator` and `pmlib.failure_prediction.FailurePredictionRcaEstimator`.

        The pipeline has `pmlib.transformer.SimpleSummaryStatistics` as the first stage to generate 
        additional summary statistics as features for model inputs. This is for both training and 
        scoring.

        Then one transfomer is added to both estimators as a training-only preprocessor:

        * `pmlib.failure_prediction.FailurePredictionEstimatorFeatureExtraction`

        It is used to extract asset failure labels.

        Note that internally the two estimators are setup to be separate sequential stags on the pipeline.

        See `pmlib.pipeline.AssetGroupPipeline.prepare_execute`.
        """

        # note that 'features' array is copied first to be passed to simple summary statistics transformer 
        # because later we will extend it for downstream stages with additional features generated by it. If 
        # we don't copy, then the same array (after extended) would be used as input by simple summary 
        # statistics transformer.
        #pipeline.add_stage(SimpleSummaryStatistics(features=model_config['features'].copy(), aggregation_methods=model_config['aggregation_methods'], rolling_window_size=model_config['rolling_window_size']))

        # add the generated satatistics into model_config['features']
        #model_config['features'].extend([(feature + '__' + aggr + '__' + str(model_config['rolling_window_size'])) for aggr in model_config['aggregation_methods'] for feature in model_config['features']])

        # because we have 2 models each using one prediction output name, we need to tweak the prediction parameter 
        # accordingly before passing to create each estimator. first preseve the original list
        predictions = model_config['predictions']

        # get the multiclass flag
        multiclass = model_config.get('multiclass', False) == True

        # get smart classfication flag
        smartclassification = model_config.get('smartclassification', False) == True

        # get the failure modes
        if model_config.get('failure_modes', None) is None:
            model_config['failure_modes'] = {}

        # add the 1st estimator for failure prediction

        model_config['predictions'] = predictions[0:-1] if multiclass else predictions[0:1]
        estimator = MulticlassFailurePredictionEstimator(**model_config) if multiclass else FailurePredictionEstimator(**model_config)
        #if multiclass:
        #    estimator = MulticlassFailurePredictionEstimator(**model_config)
        #else:
        #    if smartclassification:
        #        estimator = FailurePredictionEstimatorSmartClassification(**model_config)
        #    else:
        #       estimator = FailurePredictionEstimator(**model_config)

        pipeline.add_stage(estimator) # NOTE must add estimator to the pipeline first before adding its training preprocessors

        # Add training preprocessing stages
        estimator.add_training_preprocessor(FailurePredictionEstimatorFeatureExtraction(
                feature=model_config['features_for_training'][0],
                prediction_window_size=model_config['prediction_window_size'],
                multiclass=multiclass,
                failure_modes=model_config['failure_modes']))

        # add the 2nd RCA estimator

        model_config['predictions'] = predictions[-1:] if multiclass else predictions[1:2]
        estimator2 = MulticlassFailurePredictionRcaEstimator(**model_config) if multiclass else FailurePredictionRcaEstimator(**model_config)
        pipeline.add_stage(estimator2) # NOTE must add estimator to the pipeline first before adding its training preprocessors

        # Add training preprocessing stages
        estimator2.add_training_preprocessor(FailurePredictionEstimatorFeatureExtraction(
                feature=model_config['features_for_training'][0],
                prediction_window_size=model_config['prediction_window_size'],
                multiclass=multiclass,
                failure_modes=model_config['failure_modes']))

        # restore the original prediction output list
        model_config['predictions'] = predictions


def test(asset_group_id, iot_type, deviceid_prefix, assetid_prefix, siteid='BEDFORD'):
    from pmlib.failure_prediction import FailurePredictionAssetGroupPipeline
    
    asset_group_id = '1027'
    pump_sensor    = 'PUMP_SENSOR_DAILY'
    from srom.utils.no_op import NoOp
    from sklearn.ensemble import RandomForestClassifier

    feature_transformation_set = [('skip_transformation', NoOp())]
    feature_scaler_set = [('skip_scaling', NoOp())]
    feature_preprocessing_set = [('skip_selection', NoOp())]
    estimator_feature_generator_set = [('skip_generation', NoOp())]
    estimator_set = [('randomforestclassifier', RandomForestClassifier(max_depth=15, max_features='sqrt', min_samples_leaf=2, min_samples_split=3, n_estimators=200, n_jobs=-1))]

    my_stages = [
        feature_transformation_set,
        feature_scaler_set,
        feature_preprocessing_set,
        estimator_feature_generator_set,
        estimator_set
    ]

    for stage in my_stages:
        print(stage)
    
    from pmlib.failure_prediction import FailurePredictionAssetGroupPipeline

    fp_pipeline = myFailurePredictionAssetGroupPipeline(
        asset_group_id=asset_group_id, 
        model_pipeline={
            "features": [
            pump_sensor+':current_max_scaled',
            pump_sensor+':current_max_scaled_7d_max',
            pump_sensor+':current_std_scaled',
            pump_sensor+':daily_starts_scaled',
            pump_sensor+':rainfall_14d',
            pump_sensor+':rainfall_14d_max',
            pump_sensor+':rainfall_30d',
            pump_sensor+':rainfall_30d_max',
            pump_sensor+':runtime_max_scaled_7d_max',
            pump_sensor+':storage_level_max_scaled',
            pump_sensor+':storage_level_max_scaled_7d_max',
            pump_sensor+':storage_level_min_scaled',
            pump_sensor+':storage_level_min_scaled_7d_min',
            pump_sensor+':storage_level_std_scaled'
            ],
        "features_for_training": [":faildate"],                       # features used for labelling data
        "predictions": ["failure_probability_30d", "rca_path_30d"],   # model outputs
        "aggregation_methods": ['mean'],                              # feature engineering, options: "mean", "max", "min", "median", "std", "sum", "count"
        "prediction_window_size": "30d",                              # failure prediction window, e.g.: 4h, 15d, 30d, 60d, 90d
        "override_training_stages": my_stages
        }
    )

    train_start = '2017-07-01'
    train_end   = '2019-06-30'
    test_start  = '2019-07-01'
    test_end    = '2019-12-31'
    #df = fp_pipeline.execute()
    
    df_train = fp_pipeline.execute(start_ts=train_start, end_ts=train_end)

    #print(pmlib.log_df_info(df, head=0))
    
    # resp = group.register()
    # print(resp)


if __name__ == '__main__':
    test(asset_group_id='1004', iot_type='NodeMCU', assetid_prefix='BRAKE1205', siteid='BEDFORD', deviceid_prefix='Brake1205')

