# Licensed Materials - Property of IBM
# 5737-M66, 5900-AAA
# (C) Copyright IBM Corp. 2019, 2025 All Rights Reserved.
# US Government Users Restricted Rights - Use, duplication, or disclosure
# restricted by GSA ADP Schedule Contract with IBM Corp.

import json
import os

import numpy as np
import pandas as pd
import pytest

from .. import api
from ..transformer import SimpleSummaryStatistics
from ..util import log_df_info


def test_2d_summary_statistics(mocker):
    features = ['A']
    aggs = ['min', 'max', 'mean']
    rollsize = '2d'
    trans = SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size=rollsize)

    trans._entity_type = mocker.Mock(_df_index_entity_id='id', _timestamp='timestamp')
    num_rows = 100
    df = pd.DataFrame(columns=features, data=[[i] for i in range(num_rows)])
    df['id'] = 'A1'
    df['timestamp'] = pd.date_range(start='2019-01-01 07:10:00', periods=len(df), freq='1H')
    df = df.set_index(['id', 'timestamp'])

    df_trans = trans.execute(df=df)

    print(log_df_info(df_trans, head=-1))

    # cut-off reduced first 2d
    assert len(df) == num_rows 
    assert len(df_trans) == (num_rows - 2 * 24)
    assert df_trans.index.values[0][1] == pd.Timestamp('2019-01-03 07:10:00') # initial rollsize should be cut-off 
    assert df_trans.iloc[0]['A'] == 48
    assert df_trans.iloc[0]['A__min__%s' % rollsize] == 1
    assert df_trans.iloc[0]['A__max__%s' % rollsize] == 48
    assert df_trans.iloc[0]['A__mean__%s' % rollsize] == 24.5
    assert df_trans.index.values[len(df_trans) - 1][1] == pd.Timestamp('2019-01-05 10:10:00')
    assert df_trans.iloc[len(df_trans) - 1]['A'] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__min__%s' % rollsize] == 52
    assert df_trans.iloc[len(df_trans) - 1]['A__max__%s' % rollsize] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__mean__%s' % rollsize] == 75.5


def test_6h_summary_statistics(mocker):
    features = ['A']
    aggs = ['min', 'max', 'mean']
    rollsize = '6h'
    trans = SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size=rollsize)

    trans._entity_type = mocker.Mock(_df_index_entity_id='id', _timestamp='timestamp')
    num_rows = 100
    df = pd.DataFrame(columns=features, data=[[i] for i in range(num_rows)])
    df['id'] = 'A1'
    df['timestamp'] = pd.date_range(start='2019-01-01 07:10:37', periods=len(df), freq='1H')
    df = df.set_index(['id', 'timestamp'])

    df_trans = trans.execute(df=df)

    print(log_df_info(df_trans, head=-1))

    # cut-off reduced first 2d
    assert len(df) == num_rows 
    assert len(df_trans) == (num_rows - 6)
    assert df_trans.index.values[0][1] == pd.Timestamp('2019-01-01 13:10:37') # initial rollsize should be cut-off 
    assert df_trans.iloc[0]['A'] == 6
    assert df_trans.iloc[0]['A__min__%s' % rollsize] == 1
    assert df_trans.iloc[0]['A__max__%s' % rollsize] == 6
    assert df_trans.iloc[0]['A__mean__%s' % rollsize] == 3.5
    assert df_trans.index.values[len(df_trans) - 1][1] == pd.Timestamp('2019-01-05 10:10:37')
    assert df_trans.iloc[len(df_trans) - 1]['A'] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__min__%s' % rollsize] == 94
    assert df_trans.iloc[len(df_trans) - 1]['A__max__%s' % rollsize] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__mean__%s' % rollsize] == 96.5


def test_5min_summary_statistics(mocker):
    features = ['A']
    aggs = ['min', 'max', 'mean']
    rollsize = '5T'
    trans = SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size=rollsize)

    trans._entity_type = mocker.Mock(_df_index_entity_id='id', _timestamp='timestamp')
    num_rows = 100
    df = pd.DataFrame(columns=features, data=[[i] for i in range(num_rows)])
    df['id'] = 'A1'
    df['timestamp'] = pd.date_range(start='2019-01-01 07:10:37', periods=len(df), freq='1T')
    df = df.set_index(['id', 'timestamp'])

    df_trans = trans.execute(df=df)

    print(log_df_info(df_trans, head=-1))

    # cut-off reduced first 2d
    assert len(df) == num_rows 
    assert len(df_trans) == (num_rows - 5)
    assert df_trans.index.values[0][1] == pd.Timestamp('2019-01-01 07:15:37') # initial rollsize should be cut-off 
    assert df_trans.iloc[0]['A'] == 5
    assert df_trans.iloc[0]['A__min__%s' % rollsize] == 1
    assert df_trans.iloc[0]['A__max__%s' % rollsize] == 5
    assert df_trans.iloc[0]['A__mean__%s' % rollsize] == 3
    assert df_trans.index.values[len(df_trans) - 1][1] == pd.Timestamp('2019-01-01 08:49:37')
    assert df_trans.iloc[len(df_trans) - 1]['A'] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__min__%s' % rollsize] == 95
    assert df_trans.iloc[len(df_trans) - 1]['A__max__%s' % rollsize] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__mean__%s' % rollsize] == 97


def test_15s_summary_statistics(mocker):
    features = ['A']
    aggs = ['min', 'max', 'mean']
    rollsize = '5S'
    trans = SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size=rollsize)

    trans._entity_type = mocker.Mock(_df_index_entity_id='id', _timestamp='timestamp')
    num_rows = 100
    df = pd.DataFrame(columns=features, data=[[i] for i in range(num_rows)])
    df['id'] = 'A1'
    df['timestamp'] = pd.date_range(start='2019-01-01 07:10:37', periods=len(df), freq='1S')
    df = df.set_index(['id', 'timestamp'])

    df_trans = trans.execute(df=df)

    print(log_df_info(df_trans, head=-1))

    # cut-off reduced first 2d
    assert len(df) == num_rows 
    assert len(df_trans) == (num_rows - 5)
    assert df_trans.index.values[0][1] == pd.Timestamp('2019-01-01 07:10:42') # initial rollsize should be cut-off 
    assert df_trans.iloc[0]['A'] == 5
    assert df_trans.iloc[0]['A__min__%s' % rollsize] == 1
    assert df_trans.iloc[0]['A__max__%s' % rollsize] == 5
    assert df_trans.iloc[0]['A__mean__%s' % rollsize] == 3
    assert df_trans.index.values[len(df_trans) - 1][1] == pd.Timestamp('2019-01-01 07:12:16')
    assert df_trans.iloc[len(df_trans) - 1]['A'] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__min__%s' % rollsize] == 95
    assert df_trans.iloc[len(df_trans) - 1]['A__max__%s' % rollsize] == 99
    assert df_trans.iloc[len(df_trans) - 1]['A__mean__%s' % rollsize] == 97


def test_invalid_summary_statistics(mocker):
    features = ['A']
    aggs = ['min', 'max', 'mean']

    with pytest.raises(ValueError):
        SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size='1m')

    with pytest.raises(ValueError):
        SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size='1M')

    with pytest.raises(ValueError):
        SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size='1MS')

    with pytest.raises(ValueError):
        SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size='1w')

    with pytest.raises(ValueError):
        SimpleSummaryStatistics(features=features, aggregation_methods=aggs, rolling_window_size='1W')
