# -*- coding: utf-8 -*-
# Licensed Materials - Property of IBM
# 5737-M66, 5900-AAA
# (C) Copyright IBM Corp. 2019, 2025 All Rights Reserved.
# US Government Users Restricted Rights - Use, duplication, or disclosure
# restricted by GSA ADP Schedule Contract with IBM Corp.

import pandas as pd
import numpy as np
#from datetime import timedelta
#import logging
import timeit
#from IPython.display import display, HTML, Image
#
#import json
#import os
#import srom.utils.tabulate as tabulate

from sklearn.ensemble import IsolationForest #, RandomForestClassifier
#from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.svm import OneClassSVM
from sklearn.covariance import (EmpiricalCovariance, EllipticEnvelope, LedoitWolf, MinCovDet, OAS, ShrunkCovariance)
#from sklearn.neighbors import LocalOutlierFactor, KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.preprocessing import (StandardScaler, RobustScaler, MinMaxScaler, Normalizer)

#import srom
from srom.pipeline.anomaly_pipeline import AnomalyPipeline
from srom.pipeline.srom_param_grid import SROMParamGrid
from srom.anomaly_detection.generalized_anomaly_model import GeneralizedAnomalyModel
from srom.anomaly_detection.gaussian_graphical_anomaly_model import GaussianGraphicalModel
from srom.anomaly_detection.algorithms.anomaly_ensembler import AnomalyEnsembler
from srom.anomaly_detection.algorithms import NearestNeighborAnomalyModel, LOFNearestNeighborAnomalyModel
from srom.anomaly_detection.algorithms.pca_t2 import AnomalyPCA_T2
from srom.anomaly_detection.algorithms.pca_q import AnomalyPCA_Q
from srom.anomaly_detection.algorithms.gmm_outlier import GMMOutlier
from srom.anomaly_detection.algorithms.bayesian_gmm_outlier import BayesianGMMOutlier
from srom.anomaly_detection.algorithms.anomaly_robust_pca import AnomalyRobustPCA
from srom.anomaly_detection.algorithms.neural_network_nsa import NeuralNetworkNSA
from srom.anomaly_detection.algorithms.ggm_quic import GraphQUIC
from srom.anomaly_detection.algorithms.ggm_pgscps import GraphPgscps
from srom.anomaly_detection.algorithms.sample_svdd import SampleSVDD
from srom.anomaly_detection.algorithms.random_partition_forest import RandomPartitionForest
from srom.anomaly_detection.algorithms.extended_isolation_forest import ExtendedIsolationForest
from srom.anomaly_detection.algorithms.negative_sample_anomaly import NSA
from srom.utils.no_op import NoOp
from .estimator import SromEstimator

from enum import IntEnum, unique
@unique
class UnsupervisedADLearningAlgorithm(IntEnum):
    anomaly_ensembler=1
    anomaly_pca_q=2
    anomaly_pca_t2=3
    anomaly_robust_pca=4
    bayesian_gmmoutlier=5
    elliptic_envelope=6
    empirical_covariance=7
    extended_isolation_forest=8
    gaussian_graphical_model=9
    gaussian_graphical_model_graph_pgscps=10
    gaussian_graphical_model_graph_quic=11
    gmmoutlier=12
    isolation_forest=13
    ledoit_wolf=14
    lof_nearest_neighbor_anomaly=15
    min_cov_det=16
    nearest_neighbor_anomaly=17
    neural_network_nsa=18
    nsa=19
    oas=20
    one_class_svm=21
    random_partition_forest=22
    sample_svdd=23
    shrunk_covariance=24

"""
GLOBAL_ESTIMATORS = {
UnsupervisedADLearningAlgorithm.anomaly_ensembler:(AnomalyEnsembler(predict_only=True), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.anomaly_pca_q:(AnomalyPCA_Q(), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.anomaly_pca_t2:(AnomalyPCA_T2(), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.anomaly_robust_pca:(AnomalyRobustPCA(), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.bayesian_gmmoutlier:(BayesianGMMOutlier(n_components=2,covariance_type='diag'), 1, "decision_function"),
UnsupervisedADLearningAlgorithm.elliptic_envelope:(EllipticEnvelope(), 1, "mahalanobis"),
UnsupervisedADLearningAlgorithm.empirical_covariance:(EmpiricalCovariance(), 1, "mahalanobis"),
UnsupervisedADLearningAlgorithm.extended_isolation_forest:(ExtendedIsolationForest(), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.gaussian_graphical_model:(GaussianGraphicalModel(sliding_window_size=0, scale=False), 1, "predict"),
UnsupervisedADLearningAlgorithm.gaussian_graphical_model_graph_pgscps:(GaussianGraphicalModel(base_learner=GraphPgscps(),sliding_window_size=0, scale=True), 1, "predict"),
UnsupervisedADLearningAlgorithm.gaussian_graphical_model_graph_quic:(GaussianGraphicalModel(base_learner=GraphQUIC(),sliding_window_size=0, scale=False), 1, "predict"),
UnsupervisedADLearningAlgorithm.gmmoutlier:(GMMOutlier(n_components=2,covariance_type='diag'), 1, "decision_function"),
UnsupervisedADLearningAlgorithm.isolation_forest:(IsolationForest(), -1, 'decision_function'),
UnsupervisedADLearningAlgorithm.ledoit_wolf:(LedoitWolf(), 1, "mahalanobis"),
UnsupervisedADLearningAlgorithm.lof_nearest_neighbor_anomaly:(LOFNearestNeighborAnomalyModel(), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.min_cov_det:(MinCovDet(), 1, "mahalanobis"),
UnsupervisedADLearningAlgorithm.nearest_neighbor_anomaly:(NearestNeighborAnomalyModel(), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.neural_network_nsa:(NeuralNetworkNSA(scale=True, sample_ratio=25.0, sample_delta=0.05, batch_size=10, epochs=5, dropout=0.85, layer_width=150, n_hidden_layers=2,), -1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.nsa:(NSA(scale=True, sample_ratio=25.0, sample_delta=0.05,), 1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.oas:(OAS(), 1, "mahalanobis"),
UnsupervisedADLearningAlgorithm.one_class_svm:(OneClassSVM(), -1, "decision_function"),
UnsupervisedADLearningAlgorithm.random_partition_forest:(RandomPartitionForest(), -1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.sample_svdd:(SampleSVDD(), -1, "anomaly_score"),
UnsupervisedADLearningAlgorithm.shrunk_covariance:(ShrunkCovariance(), 1, "mahalanobis")
}
"""

class UnsupervisedAnomalyDetection(SromEstimator):
    def __init__(self, features = None, targets = ['label'], predictions = ['anomaly_score', 'anomaly_threshold', 'anomaly_detected'], **kwargs): 
        """

        Parameters
        ----------
        timeunits : TYPE, optional
            DESCRIPTION. Values permitted by Pandas Timedelta - [weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]. The default is 'days'.

        Returns
        -------
        None.

        """
        super().__init__(features=features, targets=targets, predictions=predictions, **kwargs)
        self.anomaly_threshold = self.predictions[1]
        self.config = kwargs
        self.hyper_param_grid = SROMParamGrid(gridtype='anomaly_detection_fine_grid')
        #self._logger = logging.getLogger(__file__)
        #self._logger.setLevel(loglevel)

    def train(self, train_x, **kwargs):
        log_origin = 'UnsupervisedAnomalyDetection::train - '
        pipeline, best_execution_res, execution_res = None, None, None
        anomaly_threshold, class_distribution = None, None
        threshold_computation_strategy = kwargs.get('threshold_computation_strategy', 'default')
        qfunction_threshold = kwargs.get('qfunction_threshold', 0.1)
        medianabsolutedev_threshold = kwargs.get('medianabsolutedev_threshold', 2.5)
        contamination = kwargs.get('contamination', None)
        self.logger.info(log_origin+'Threshold Computation Strategy = ' + threshold_computation_strategy + \
                            ', qfunction threhsold = ' + str(qfunction_threshold) + \
                            ', median absolute deviation threshold = ' + str(medianabsolutedev_threshold) + \
                            ', contamination = ' + str(contamination))

        if kwargs.get('threshold_computation_strategy', 'default') == 'contamination':
            pipeline, best_execution_res, execution_res, cutoff, anomaly_threshold, class_distribution = \
                UnsupervisedAnomalyDetection.__fit_using_contamination_threshold(train_x = train_x, hyper_param_grid = self.hyper_param_grid, **kwargs)
        else:
            pipeline, best_execution_res, execution_res, cutoff, anomaly_threshold, class_distribution = \
                UnsupervisedAnomalyDetection.__fit_using_non_contamination_threshold(train_x, self.hyper_param_grid, **kwargs)
        self.logger.info(log_origin+'Anomaly Threshold = ' + str(anomaly_threshold))
        self.logger.info(log_origin+'class distribution = ' + str(class_distribution))
        return pipeline, best_execution_res, execution_res, cutoff, anomaly_threshold, class_distribution

    def evaluate(self, pipeline, test_x, test_y, **kwargs):
        scoring_method = kwargs.get('scoring_method', 'f1')
        pipeline.set_scoring(scoring = scoring_method)
        return pipeline.predict(test_x), pipeline.predict_proba(test_x), pipeline.score(test_x, test_y)

    def create_pipeline(self):
        raise NotImplementedError('This method is not used for Unsupervised Learning path')

    def get_prediction_result_value_index(self):
        return (1, 0)

    def process_prediction_result(self, df, prediction_result, model):

        super().process_prediction_result(df, prediction_result, model)

        if prediction_result is not None:
            best_threshold_value = model.get_best_thresholds()[0]
            if best_threshold_value != None and (isinstance(best_threshold_value, list) or \
                                isinstance(best_threshold_value, np.ndarray)) and len(best_threshold_value) > 0:
                best_threshold_value = best_threshold_value[0]
            self.logger.debug('best_thresholds=%s', best_threshold_value)
            df[self.anomaly_threshold] = best_threshold_value
        else:
            df[self.anomaly_threshold] = None
        if len(self.predictions) > 2:
            df[self.predictions[2]] = df[self.predictions[0]] > df[self.predictions[1]]

        return df

    def train_model(self, df):
        self.pipeline, self.best_execution_res, self.execution_res, self.cutoff, \
            self.anomaly_threshold_value, self.class_distribution = self.train(df[self.features].values, **self.config)
        return self.pipeline



    # ###################### PRIVATE METHODS #############################

    # pylint: disable=not-callable
    @classmethod
    def __get_stages(cls, classifier_name, transformer_instance = None, **kwargs):
        estimator = None
        if transformer_instance == None:
            transformer_instance = NoOp()
        if not isinstance (classifier_name, UnsupervisedADLearningAlgorithm):
            raise TypeError("Only enumerated types of LearningAlgorithm are supported")
        elif classifier_name == UnsupervisedADLearningAlgorithm.anomaly_ensembler:
            estimator = (AnomalyEnsembler(predict_only=True), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.anomaly_pca_q:
            estimator = (AnomalyPCA_Q(), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.anomaly_pca_t2:
            estimator = (AnomalyPCA_T2(), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.anomaly_robust_pca:
            estimator = (AnomalyRobustPCA(), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.bayesian_gmmoutlier:
            estimator = (BayesianGMMOutlier(n_components = kwargs.get('n_components',2),
                                        covariance_type=kwargs.get('covariance_type', 'full')), 1, "decision_function")
        elif classifier_name == UnsupervisedADLearningAlgorithm.elliptic_envelope:
            estimator = (EllipticEnvelope(), 1, "mahalanobis")
        elif classifier_name == UnsupervisedADLearningAlgorithm.empirical_covariance:
            estimator = (EmpiricalCovariance(), 1, "mahalanobis")
        elif classifier_name == UnsupervisedADLearningAlgorithm.extended_isolation_forest:
            estimator = (ExtendedIsolationForest(), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.gaussian_graphical_model:
            estimator = (GaussianGraphicalModel(sliding_window_size=0, scale=False), 1, "predict")
        elif classifier_name == UnsupervisedADLearningAlgorithm.gaussian_graphical_model_graph_pgscps:
            estimator = (GaussianGraphicalModel(base_learner=GraphPgscps(),sliding_window_size=0, scale=True), 1, "predict")
        elif classifier_name == UnsupervisedADLearningAlgorithm.gaussian_graphical_model_graph_quic:
            estimator = (GaussianGraphicalModel(base_learner=GraphQUIC(),sliding_window_size=0, scale=False), 1, "predict")
        elif classifier_name == UnsupervisedADLearningAlgorithm.gmmoutlier:
            estimator = (GMMOutlier(n_components = kwargs.get('n_components',2),
                                covariance_type=kwargs.get('covariance_type', 'full')), 1, "decision_function")
        elif classifier_name == UnsupervisedADLearningAlgorithm.isolation_forest:
            estimator = (IsolationForest(), -1, 'decision_function')
        elif classifier_name == UnsupervisedADLearningAlgorithm.ledoit_wolf:
            estimator = (LedoitWolf(), 1, "mahalanobis")
        elif classifier_name == UnsupervisedADLearningAlgorithm.lof_nearest_neighbor_anomaly:
            estimator = (LOFNearestNeighborAnomalyModel(), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.min_cov_det:
            estimator = (MinCovDet(), 1, "mahalanobis")
        elif classifier_name == UnsupervisedADLearningAlgorithm.nearest_neighbor_anomaly:
            estimator = (NearestNeighborAnomalyModel(), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.neural_network_nsa:
            estimator = (NeuralNetworkNSA(scale=True, sample_ratio=25.0, sample_delta=0.05, batch_size=10, epochs=5, dropout=0.85, layer_width=150, n_hidden_layers=2,), -1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.nsa:
            estimator = (NSA(scale=True, sample_ratio=25.0, sample_delta=0.05,), 1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.oas:
            estimator = (OAS(), 1, "mahalanobis")
        elif classifier_name == UnsupervisedADLearningAlgorithm.one_class_svm:
            estimator = (OneClassSVM(), -1, "decision_function")
        elif classifier_name == UnsupervisedADLearningAlgorithm.random_partition_forest:
            estimator = (RandomPartitionForest(), -1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.sample_svdd:
            estimator = (SampleSVDD(), -1, "anomaly_score")
        elif classifier_name == UnsupervisedADLearningAlgorithm.shrunk_covariance:
            estimator = (ShrunkCovariance(), 1, "mahalanobis")

        return transformer_instance, estimator

    @classmethod
    def get_score_ranges_by_labels(cls, labels, scores):
        #scores_range = (np.min(scores),np.max(scores))
        metrics_df = pd.DataFrame({'labels':[val[0] for val in labels],'scores':[val[0] for val in scores]})
        class_1_scores = metrics_df.loc[metrics_df[metrics_df['labels'] == 1].index]['scores'].values
        class_1_scores_range = (np.NaN, np.NaN)
        if len(class_1_scores) > 0:
            class_1_scores_range = (np.min(class_1_scores),np.max(class_1_scores))
        class_minus_1_scores = metrics_df.loc[metrics_df[metrics_df['labels'] == -1].index]['scores'].values
        class_minus_1_scores_range = (np.NaN, np.NaN)
        if len(class_minus_1_scores) > 0:
            class_minus_1_scores_range = (np.min(class_minus_1_scores),np.max(class_minus_1_scores))
        return class_minus_1_scores_range, class_1_scores_range

    @classmethod
    def __fit_using_contamination_threshold(cls, train_x, hyper_param_grid = None, **kwargs):
        log_origin = 'UnsupervisedAnomalyDetection::__fit_using_contamination_threshold - '
        contamination = kwargs.get('contamination', 0.1)
        print((log_origin+'contamination value = '+ str(contamination)))
        pipeline, best_execution_res, execution_res = None, None, None
        anomaly_threshold, class_label_distribution = None, None
        #learning_algorithm = kwargs.get('learning_algorithm', UnsupervisedADLearningAlgorithm.isolation_forest)
        learning_algorithm = UnsupervisedADLearningAlgorithm.isolation_forest # default
        scaling = None
        stages = kwargs.get('override_training_stages',None)
        if stages != None and len(stages) > 0:
            if len(stages) == 1:
                learning_algorithm = stages[0]
            else:
                scaling = stages[0]
                learning_algorithm = stages[1]
        #scaling = kwargs.get('transformer', None)
        print((log_origin+'Scaling = '+ str(scaling) + ', Learning Algorithm = '+ str(learning_algorithm)))
        if not isinstance (learning_algorithm, UnsupervisedADLearningAlgorithm):
            raise TypeError("Only enumerated types of LearningAlgorithm are supported")
        else:
            transformer, estimator = UnsupervisedAnomalyDetection.__get_stages(classifier_name = learning_algorithm,
                                                                               transformer_instance = scaling, **kwargs)
            pipeline = AnomalyPipeline(anomaly_threshold_method='contamination',contamination = contamination)
            gam = GeneralizedAnomalyModel(
            base_learner = estimator[0],
            fit_function='fit',
            predict_function=estimator[2],
            score_sign = estimator[1]
            )
            secondary_string = "_score_pos" if estimator[1] == 1 else "_score_neg"
            stages = [
                [
                    (transformer.__class__.__name__, transformer)
                ],
                [
                    (estimator[0].__class__.__name__+secondary_string, gam)
                ]
            ]
            #stages = [[(str(str(estimator) + str(secondary_string)), gam)]]
            #print (stages)
            pipeline.set_stages(stages)
            #display(Image(pipeline.create_graph()))
            start = timeit.default_timer()
            pipeline_output = pipeline.execute(train_x, validX=None, validy=None, exectype=kwargs.get('execution_type', "spark_node_complete_search")
                                           ,random_state=42)
            stop = timeit.default_timer()
            print((log_origin+'Pipeline execution took ' + str(stop-start) + ' seconds '))
            best_execution_res = [[pipeline.best_estimator, pipeline.best_score]]
            anomaly_threshold = pipeline.get_best_thresholds()
            print(log_origin + ' Anomaly Threshold as received from the pipeline is '+str(anomaly_threshold))
            if anomaly_threshold != None and (isinstance(anomaly_threshold, list) or \
                                isinstance(anomaly_threshold, np.ndarray)) and len(anomaly_threshold) > 0:
                anomaly_threshold = anomaly_threshold[0]
            class_labels = pipeline.predict(train_x)
            #class_probability = pipeline.predict_proba(train_x)
            label_counts = np.unique(class_labels, return_counts = True)
            print((log_origin)+str(label_counts))
            class_label_distribution = {('class '+str(label_counts[0][0])):label_counts[1][0],
                                        ('class '+str(label_counts[0][1])):label_counts[1][1]}

        return pipeline, best_execution_res, execution_res, contamination, anomaly_threshold, class_label_distribution

    @classmethod
    def __fit_using_non_contamination_threshold(cls, train_x, hyper_params, **kwargs):
        log_origin = 'UnsupervisedAnomalyDetection::__fit_using_non_contamination_threshold - '
        pipeline, best_execution_res, execution_res = None, None, None
        anomaly_threshold, class_label_distribution = None, None
        threshold_computation_strategy = kwargs.get('threshold_computation_strategy', 'default')
        qfunction_threshold = kwargs.get('qfunction_threshold', 0.9)
        medianabsolutedev_threshold = kwargs.get('medianabsolutedev_threshold', 2.5)
        #contamination = kwargs.get('contamination', None)
        #cls.logger.debug(log_origin+'Threshold Computation Strategy = ' + threshold_computation_strategy + \
        #                    ', qfunction threhsold = ' + str(qfunction_threshold) + \
        #                    ', median absolute deviation threshold = ' + str(medianabsolutedev_threshold) + \
        #                    ', contamination = ' + str(contamination))
        pipeline = AnomalyPipeline()
        if threshold_computation_strategy == 'default':
            print('Using default threshold computation strategy')
        if threshold_computation_strategy == 'qfunction':
            pipeline = AnomalyPipeline(anomaly_threshold_method=threshold_computation_strategy,
                                       qfunction_threshold = qfunction_threshold)
            print('Using qfunction as the threshold computation strategy')
        elif threshold_computation_strategy == 'medianabsolutedev':
            pipeline = AnomalyPipeline(anomaly_threshold_method=threshold_computation_strategy,
                                       medianabsolutedev_threshold = medianabsolutedev_threshold)
            print('Using medianabsolutedev as the threshold computation strategy')

        # Initialize Different Anomaly Learner
        gam_if = GeneralizedAnomalyModel(base_learner=IsolationForest(), predict_function='decision_function', score_sign=-1)
        gam_empirical = GeneralizedAnomalyModel(base_learner=EmpiricalCovariance(), fit_function='fit', predict_function='mahalanobis',score_sign=1)
        gam_elliptic = GeneralizedAnomalyModel(base_learner=EllipticEnvelope(), fit_function='fit', predict_function='mahalanobis',score_sign=1)
        gam_ledoitwolf = GeneralizedAnomalyModel(base_learner=LedoitWolf(), fit_function='fit', predict_function='mahalanobis',score_sign=1)
        gam_mincovdet = GeneralizedAnomalyModel(base_learner=MinCovDet(), fit_function='fit', predict_function='mahalanobis',score_sign=1)
        gam_OAS = GeneralizedAnomalyModel(base_learner=OAS(), fit_function='fit', predict_function='mahalanobis',score_sign=1)
        gam_ShrunkCovariance = GeneralizedAnomalyModel(base_learner=ShrunkCovariance(), fit_function='fit', predict_function='mahalanobis',score_sign=1)

        # add the Anomaly Learner as a part of generate states
        stages = [
          [
           ('skipscaling', NoOp()), ('standardscaler', StandardScaler()),
           ('robustscaler', RobustScaler()), ('minmaxscaling', MinMaxScaler()),
           ('normalizer', Normalizer())
          ],
          [
           ('isolationforest', gam_if),
           ('empiricalcovariance', gam_empirical), ('ellipticenvelope', gam_elliptic),
           ('ledoitwolf', gam_ledoitwolf), ('mincovdet', gam_mincovdet),
           ('oas', gam_OAS), ('shrunkcovariance', gam_ShrunkCovariance)
          ]
        ]

        pipeline.set_stages(stages)

        start = timeit.default_timer()
        # we'll execute the pipeline with a fine parameter grid
        pipeline_output = pipeline.execute(trainX=train_x, validX=None, validy=None,\
                                         verbosity='low', param_grid = hyper_params,\
                                         exectype = kwargs.get('execution_type',"spark_node_random_search"),
                                         num_option_per_pipeline=1,max_eval_time_minute=1)
        stop = timeit.default_timer()
        print(log_origin+'Pipeline execution took ' + str(stop-start) + ' seconds ')

        best_execution_res = [[pipeline.best_estimator, pipeline.best_score]]

        #display(HTML(tabulate.tabulate(execution_res, headers = ['Best Pipeline', 'Score'], tablefmt='html')))

        # create SROM Anomaly Model Graph
        #Image(pipeline.create_graph())

        #headers = ['Pipeline', 'Score']
        execution_res = []
        execution_i = 0
        for item in pipeline_output.best_estimators:
            execution_res.append([item, pipeline_output.best_scores[execution_i]])
            execution_i = execution_i + 1

        #display(HTML(tabulate.tabulate(execution_res, headers, tablefmt='html')))

        pipeline.fit(train_x)

        cutoff = pipeline.get_best_thresholds()
        if cutoff != None and isinstance(cutoff, list)and len(cutoff) > 0:
            cutoff = cutoff[0]

        #cls._logger.debug(log_origin+'Anomaly Threshold = ' + str(anomaly_threshold))

        class_labels = pipeline.predict(train_x)
        #class_probability = pipeline.predict_proba(train_x)
        label_counts = np.unique(class_labels, return_counts = True)
        print((log_origin)+str(label_counts))
        class_label_distribution = {('class '+str(label_counts[0][0])):label_counts[1][0]}
        if len(label_counts[0]) > 1 and len(label_counts[1]) > 0:
            class_label_distribution[('class '+str(label_counts[0][1]))] = label_counts[1][1]
        scores = pipeline.predict_proba(train_x)
        class_minus_1_range, class_1_range = UnsupervisedAnomalyDetection.get_score_ranges_by_labels(class_labels.tolist(), scores.tolist())
        anomaly_threshold = class_1_range[1]
        if anomaly_threshold != None and (isinstance(anomaly_threshold, list) or \
                                isinstance(anomaly_threshold, np.ndarray)) and len(anomaly_threshold) > 0:
                anomaly_threshold = anomaly_threshold[0]
        pipeline.set_best_thresholds(np.array([anomaly_threshold]))
        return pipeline, best_execution_res, execution_res, cutoff, anomaly_threshold, class_label_distribution

