mljar/mljar-supervised # codebase.md

This is page 3 of 16. Use http://codebase.md/mljar/mljar-supervised?page={x} to view the full context.

# Directory Structure

```
├── .github
│   └── workflows
│       ├── run-tests.yml
│       ├── test-installation-with-conda.yml
│       └── test-installation-with-pip-on-windows.yml
├── .gitignore
├── CITATION
├── examples
│   ├── notebooks
│   │   ├── basic_run.ipynb
│   │   └── Titanic.ipynb
│   └── scripts
│       ├── binary_classifier_adult_fairness.py
│       ├── binary_classifier_ensemble.py
│       ├── binary_classifier_marketing.py
│       ├── binary_classifier_random.py
│       ├── binary_classifier_Titanic.py
│       ├── binary_classifier.py
│       ├── multi_class_classifier_digits.py
│       ├── multi_class_classifier_MNIST.py
│       ├── multi_class_classifier.py
│       ├── multi_class_drug_fairness.py
│       ├── regression_acs_fairness.py
│       ├── regression_crime_fairness.py
│       ├── regression_housing_fairness.py
│       ├── regression_law_school_fairness.py
│       ├── regression.py
│       └── tabular_mar_2021.py
├── LICENSE
├── MANIFEST.in
├── pytest.ini
├── README.md
├── requirements_dev.txt
├── requirements.txt
├── setup.py
├── supervised
│   ├── __init__.py
│   ├── algorithms
│   │   ├── __init__.py
│   │   ├── algorithm.py
│   │   ├── baseline.py
│   │   ├── catboost.py
│   │   ├── decision_tree.py
│   │   ├── extra_trees.py
│   │   ├── factory.py
│   │   ├── knn.py
│   │   ├── lightgbm.py
│   │   ├── linear.py
│   │   ├── nn.py
│   │   ├── random_forest.py
│   │   ├── registry.py
│   │   ├── sklearn.py
│   │   └── xgboost.py
│   ├── automl.py
│   ├── base_automl.py
│   ├── callbacks
│   │   ├── __init__.py
│   │   ├── callback_list.py
│   │   ├── callback.py
│   │   ├── early_stopping.py
│   │   ├── learner_time_constraint.py
│   │   ├── max_iters_constraint.py
│   │   ├── metric_logger.py
│   │   ├── terminate_on_nan.py
│   │   └── total_time_constraint.py
│   ├── ensemble.py
│   ├── exceptions.py
│   ├── fairness
│   │   ├── __init__.py
│   │   ├── metrics.py
│   │   ├── optimization.py
│   │   ├── plots.py
│   │   ├── report.py
│   │   └── utils.py
│   ├── model_framework.py
│   ├── preprocessing
│   │   ├── __init__.py
│   │   ├── datetime_transformer.py
│   │   ├── encoding_selector.py
│   │   ├── exclude_missing_target.py
│   │   ├── goldenfeatures_transformer.py
│   │   ├── kmeans_transformer.py
│   │   ├── label_binarizer.py
│   │   ├── label_encoder.py
│   │   ├── preprocessing_categorical.py
│   │   ├── preprocessing_missing.py
│   │   ├── preprocessing_utils.py
│   │   ├── preprocessing.py
│   │   ├── scale.py
│   │   └── text_transformer.py
│   ├── tuner
│   │   ├── __init__.py
│   │   ├── data_info.py
│   │   ├── hill_climbing.py
│   │   ├── mljar_tuner.py
│   │   ├── optuna
│   │   │   ├── __init__.py
│   │   │   ├── catboost.py
│   │   │   ├── extra_trees.py
│   │   │   ├── knn.py
│   │   │   ├── lightgbm.py
│   │   │   ├── nn.py
│   │   │   ├── random_forest.py
│   │   │   ├── tuner.py
│   │   │   └── xgboost.py
│   │   ├── preprocessing_tuner.py
│   │   ├── random_parameters.py
│   │   └── time_controller.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── additional_metrics.py
│   │   ├── additional_plots.py
│   │   ├── automl_plots.py
│   │   ├── common.py
│   │   ├── config.py
│   │   ├── constants.py
│   │   ├── data_validation.py
│   │   ├── importance.py
│   │   ├── jsonencoder.py
│   │   ├── leaderboard_plots.py
│   │   ├── learning_curves.py
│   │   ├── metric.py
│   │   ├── shap.py
│   │   ├── subsample.py
│   │   └── utils.py
│   └── validation
│       ├── __init__.py
│       ├── validation_step.py
│       ├── validator_base.py
│       ├── validator_custom.py
│       ├── validator_kfold.py
│       └── validator_split.py
└── tests
    ├── __init__.py
    ├── checks
    │   ├── __init__.py
    │   ├── check_automl_with_regression.py
    │   ├── run_ml_tests.py
    │   └── run_performance_tests.py
    ├── conftest.py
    ├── data
    │   ├── 179.csv
    │   ├── 24.csv
    │   ├── 3.csv
    │   ├── 31.csv
    │   ├── 38.csv
    │   ├── 44.csv
    │   ├── 720.csv
    │   ├── 737.csv
    │   ├── acs_income_1k.csv
    │   ├── adult_missing_values_missing_target_500rows.csv
    │   ├── boston_housing.csv
    │   ├── CrimeData
    │   │   ├── cities.json
    │   │   ├── crimedata.csv
    │   │   └── README.md
    │   ├── Drug
    │   │   ├── Drug_Consumption.csv
    │   │   └── README.md
    │   ├── housing_regression_missing_values_missing_target.csv
    │   ├── iris_classes_missing_values_missing_target.csv
    │   ├── iris_missing_values_missing_target.csv
    │   ├── LawSchool
    │   │   ├── bar_pass_prediction.csv
    │   │   └── README.md
    │   ├── PortugeseBankMarketing
    │   │   └── Data_FinalProject.csv
    │   └── Titanic
    │       ├── test_with_Survived.csv
    │       └── train.csv
    ├── README.md
    ├── tests_algorithms
    │   ├── __init__.py
    │   ├── test_baseline.py
    │   ├── test_catboost.py
    │   ├── test_decision_tree.py
    │   ├── test_extra_trees.py
    │   ├── test_factory.py
    │   ├── test_knn.py
    │   ├── test_lightgbm.py
    │   ├── test_linear.py
    │   ├── test_nn.py
    │   ├── test_random_forest.py
    │   ├── test_registry.py
    │   └── test_xgboost.py
    ├── tests_automl
    │   ├── __init__.py
    │   ├── test_adjust_validation.py
    │   ├── test_automl_init.py
    │   ├── test_automl_report.py
    │   ├── test_automl_sample_weight.py
    │   ├── test_automl_time_constraints.py
    │   ├── test_automl.py
    │   ├── test_data_types.py
    │   ├── test_dir_change.py
    │   ├── test_explain_levels.py
    │   ├── test_golden_features.py
    │   ├── test_handle_imbalance.py
    │   ├── test_integration.py
    │   ├── test_joblib_version.py
    │   ├── test_models_needed_for_predict.py
    │   ├── test_prediction_after_load.py
    │   ├── test_repeated_validation.py
    │   ├── test_restore.py
    │   ├── test_stack_models_constraints.py
    │   ├── test_targets.py
    │   └── test_update_errors_report.py
    ├── tests_callbacks
    │   ├── __init__.py
    │   └── test_total_time_constraint.py
    ├── tests_ensemble
    │   ├── __init__.py
    │   └── test_save_load.py
    ├── tests_fairness
    │   ├── __init__.py
    │   ├── test_binary_classification.py
    │   ├── test_multi_class_classification.py
    │   └── test_regression.py
    ├── tests_preprocessing
    │   ├── __init__.py
    │   ├── disable_eda.py
    │   ├── test_categorical_integers.py
    │   ├── test_datetime_transformer.py
    │   ├── test_encoding_selector.py
    │   ├── test_exclude_missing.py
    │   ├── test_goldenfeatures_transformer.py
    │   ├── test_label_binarizer.py
    │   ├── test_label_encoder.py
    │   ├── test_preprocessing_missing.py
    │   ├── test_preprocessing_utils.py
    │   ├── test_preprocessing.py
    │   ├── test_scale.py
    │   └── test_text_transformer.py
    ├── tests_tuner
    │   ├── __init__.py
    │   ├── test_hill_climbing.py
    │   ├── test_time_controller.py
    │   └── test_tuner.py
    ├── tests_utils
    │   ├── __init__.py
    │   ├── test_compute_additional_metrics.py
    │   ├── test_importance.py
    │   ├── test_learning_curves.py
    │   ├── test_metric.py
    │   ├── test_shap.py
    │   └── test_subsample.py
    └── tests_validation
        ├── __init__.py
        ├── test_validator_kfold.py
        └── test_validator_split.py
```

# Files

--------------------------------------------------------------------------------
/supervised/utils/additional_plots.py:
--------------------------------------------------------------------------------

```python
import os

import numpy as np
import scikitplot as skplt
from matplotlib import pyplot as plt


class AdditionalPlots:
    @staticmethod
    def plots_binary(target, predicted_labels, predicted_probas):
        figures = []
        try:
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_confusion_matrix(
                target, predicted_labels, normalize=False, ax=ax1
            )
            figures += [
                {
                    "title": "Confusion Matrix",
                    "fname": "confusion_matrix.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_confusion_matrix(
                target, predicted_labels, normalize=True, ax=ax1
            )
            figures += [
                {
                    "title": "Normalized Confusion Matrix",
                    "fname": "confusion_matrix_normalized.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_roc(target, predicted_probas, ax=ax1)
            figures += [{"title": "ROC Curve", "fname": "roc_curve.png", "figure": fig}]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_ks_statistic(target, predicted_probas, ax=ax1)
            figures += [
                {
                    "title": "Kolmogorov-Smirnov Statistic",
                    "fname": "ks_statistic.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_precision_recall(target, predicted_probas, ax=ax1)
            figures += [
                {
                    "title": "Precision-Recall Curve",
                    "fname": "precision_recall_curve.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            # transform target if needed to be {0, 1}
            target_uniq_values = np.unique(target)
            target_transformed = target.values.ravel()
            if not (0 in target_uniq_values and 1 in target_uniq_values):
                mapping = {target_uniq_values[0]: 0, target_uniq_values[1]: 1}
                target_transformed = target.map(mapping)
            # create a plot
            _ = skplt.metrics.plot_calibration_curve(
                target_transformed, [predicted_probas], ["Classifier"], ax=ax1
            )
            figures += [
                {
                    "title": "Calibration Curve",
                    "fname": "calibration_curve_curve.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_cumulative_gain(target, predicted_probas, ax=ax1)
            figures += [
                {
                    "title": "Cumulative Gains Curve",
                    "fname": "cumulative_gains_curve.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_lift_curve(target, predicted_probas, ax=ax1)
            figures += [
                {"title": "Lift Curve", "fname": "lift_curve.png", "figure": fig}
            ]

        except Exception as e:
            print(str(e))

        return figures

    @staticmethod
    def plots_multiclass(target, predicted_labels, predicted_probas):
        figures = []
        try:
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_confusion_matrix(
                target, predicted_labels, normalize=False, ax=ax1
            )
            figures += [
                {
                    "title": "Confusion Matrix",
                    "fname": "confusion_matrix.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_confusion_matrix(
                target, predicted_labels, normalize=True, ax=ax1
            )
            figures += [
                {
                    "title": "Normalized Confusion Matrix",
                    "fname": "confusion_matrix_normalized.png",
                    "figure": fig,
                }
            ]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_roc(target, predicted_probas, ax=ax1)
            figures += [{"title": "ROC Curve", "fname": "roc_curve.png", "figure": fig}]
            #
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            _ = skplt.metrics.plot_precision_recall(target, predicted_probas, ax=ax1)
            figures += [
                {
                    "title": "Precision Recall Curve",
                    "fname": "precision_recall_curve.png",
                    "figure": fig,
                }
            ]
            plt.close("all")
        except Exception as e:
            print(str(e))

        return figures

    @staticmethod
    def plots_regression(target, predictions):
        figures = []
        try:
            MAX_SAMPLES = 5000
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            samples = target.shape[0]
            if samples > MAX_SAMPLES:
                samples = MAX_SAMPLES
            ax1.scatter(
                target[:samples], predictions[:samples], c="tab:blue", alpha=0.2
            )
            plt.xlabel("True values")
            plt.ylabel("Predicted values")
            plt.title(f"Target values vs Predicted values (samples={samples})")
            plt.tight_layout(pad=5.0)
            figures += [
                {
                    "title": "True vs Predicted",
                    "fname": "true_vs_predicted.png",
                    "figure": fig,
                }
            ]

            # residual plot
            fig = plt.figure(figsize=(10, 7))
            ax1 = fig.add_subplot(1, 1, 1)
            residuals = target[:samples].values - predictions[:samples].values
            ax1.scatter(predictions[:samples], residuals, c="tab:blue", alpha=0.2)
            plt.xlabel("Predicted values")
            plt.ylabel("Residuals")
            plt.title(f"Predicted values vs Residuals (samples={samples})")
            plt.tight_layout(pad=5.0)
            bb = ax1.get_position()

            ax2 = fig.add_axes((bb.x0 + bb.size[0], bb.y0, 0.05, bb.size[1]))
            ax2.set_xticklabels([])
            ax2.set_yticklabels([])
            ax2.hist(residuals, 50, orientation="horizontal", alpha=0.5)
            ax2.axis("off")

            figures += [
                {
                    "title": "Predicted vs Residuals",
                    "fname": "predicted_vs_residuals.png",
                    "figure": fig,
                }
            ]
            plt.close("all")

        except Exception as e:
            print(str(e))
        return figures

    @staticmethod
    def append(fout, model_path, plots):
        try:
            for plot in plots:
                fname = plot.get("fname")
                fig = plot.get("figure")
                title = plot.get("title", "")
                fig.savefig(os.path.join(model_path, fname))
                fout.write(f"\n## {title}\n\n")
                fout.write(f"![{title}]({fname})\n\n")
        except Exception as e:
            print(str(e))

```

--------------------------------------------------------------------------------
/tests/tests_preprocessing/test_label_binarizer.py:
--------------------------------------------------------------------------------

```python
import json
import unittest

import numpy as np
import pandas as pd

from supervised.preprocessing.label_binarizer import LabelBinarizer


class LabelBinarizerTest(unittest.TestCase):
    def test_fit(self):
        # training data
        d = {"col1": ["a", "a", "c"], "col2": ["w", "e", "d"]}
        df = pd.DataFrame(data=d)
        lb = LabelBinarizer()
        # check first column
        lb.fit(df, "col1")
        data_json = lb.to_json()
        self.assertTrue("new_columns" in data_json)
        # we take alphabetical order
        self.assertTrue("col1_c" in data_json["new_columns"])
        self.assertTrue("col1_a" not in data_json["new_columns"])
        self.assertTrue("unique_values" in data_json)
        self.assertTrue("a" in data_json["unique_values"])
        self.assertTrue("c" in data_json["unique_values"])

        lb = LabelBinarizer()
        # check second column
        lb.fit(df, "col2")
        data_json = lb.to_json()
        self.assertTrue("new_columns" in data_json)
        self.assertTrue("col2_w" in data_json["new_columns"])
        self.assertTrue("col2_e" in data_json["new_columns"])
        self.assertTrue("col2_d" in data_json["new_columns"])
        self.assertTrue("unique_values" in data_json)
        self.assertTrue("w" in data_json["unique_values"])
        self.assertTrue("e" in data_json["unique_values"])
        self.assertTrue("d" in data_json["unique_values"])

    def test_transform(self):
        # training data
        d = {"col1": ["a", "a", "c"], "col2": ["w", "e", "d"]}
        df = pd.DataFrame(data=d)
        # fit binarizer
        lb1 = LabelBinarizer()
        lb1.fit(df, "col1")
        lb2 = LabelBinarizer()
        lb2.fit(df, "col2")
        # test data
        d_test = {"col1": ["c", "c", "a"], "col2": ["e", "d", "w"], "col3": [2, 3, 4]}
        df_test = pd.DataFrame(data=d_test)
        # transform
        df_test = lb1.transform(df_test, "col1")
        df_test = lb2.transform(df_test, "col2")
        # for binary column, only one value is left, old column should be deleted
        self.assertTrue("col1_c" in df_test.columns)
        self.assertTrue("col1" not in df_test.columns)
        self.assertEqual(2, np.sum(df_test["col1_c"]))
        # for multiple value colum, all columns should be added
        self.assertTrue("col2_w" in df_test.columns)
        self.assertTrue("col2_e" in df_test.columns)
        self.assertTrue("col2_d" in df_test.columns)
        self.assertTrue("col2" not in df_test.columns)
        self.assertEqual(1, np.sum(df_test["col2_w"]))
        self.assertEqual(1, np.sum(df_test["col2_e"]))
        self.assertEqual(1, np.sum(df_test["col2_d"]))
        # do not touch continuous attribute
        self.assertTrue("col3" in df_test.columns)

    def test_transform_with_new_values(self):
        # training data
        d = {"col1": ["a", "a", "c"], "col2": ["w", "e", "d"]}
        df = pd.DataFrame(data=d)
        # fit binarizer
        lb1 = LabelBinarizer()
        lb1.fit(df, "col1")
        lb2 = LabelBinarizer()
        lb2.fit(df, "col2")
        # test data
        d_test = {"col1": ["c", "d", "d"], "col2": ["g", "e", "f"], "col3": [2, 3, 4]}
        df_test = pd.DataFrame(data=d_test)
        # transform
        df_test = lb1.transform(df_test, "col1")
        df_test = lb2.transform(df_test, "col2")
        self.assertTrue("col1_c" in df_test.columns)
        self.assertTrue("col1_d" not in df_test.columns)
        self.assertTrue("col2_w" in df_test.columns)
        self.assertTrue("col2_e" in df_test.columns)
        self.assertTrue("col2_d" in df_test.columns)
        self.assertTrue("col2_g" not in df_test.columns)
        self.assertTrue("col2_f" not in df_test.columns)
        self.assertEqual(df_test["col1_c"][0], 1)
        self.assertEqual(df_test["col1_c"][1], 0)
        self.assertEqual(df_test["col1_c"][2], 0)
        self.assertEqual(np.sum(df_test["col2_w"]), 0)
        self.assertEqual(np.sum(df_test["col2_d"]), 0)
        self.assertEqual(df_test["col2_e"][0], 0)
        self.assertEqual(df_test["col2_e"][1], 1)
        self.assertEqual(df_test["col2_e"][2], 0)

    def test_to_and_from_json(self):
        # training data
        d = {"col1": ["a", "a", "c"], "col2": ["w", "e", "d"]}
        df = pd.DataFrame(data=d)
        # fit binarizer
        lb1 = LabelBinarizer()
        lb1.fit(df, "col1")
        lb2 = LabelBinarizer()
        lb2.fit(df, "col2")
        # test data
        d_test = {"col1": ["c", "c", "a"], "col2": ["e", "d", "w"], "col3": [2, 3, 4]}
        df_test = pd.DataFrame(data=d_test)
        # to json and from json
        new_lb1 = LabelBinarizer()
        new_lb2 = LabelBinarizer()
        new_lb1.from_json(lb1.to_json())
        new_lb2.from_json(lb2.to_json())
        # transform
        df_test = new_lb1.transform(df_test, "col1")
        df_test = new_lb2.transform(df_test, "col2")
        # for binary column, only one value is left, old column should be deleted
        self.assertTrue("col1_c" in df_test.columns)
        self.assertTrue("col1" not in df_test.columns)
        self.assertEqual(2, np.sum(df_test["col1_c"]))
        # for multiple value colum, all columns should be added
        self.assertTrue("col2_w" in df_test.columns)
        self.assertTrue("col2_e" in df_test.columns)
        self.assertTrue("col2_d" in df_test.columns)
        self.assertTrue("col2" not in df_test.columns)
        self.assertEqual(1, np.sum(df_test["col2_w"]))
        self.assertEqual(1, np.sum(df_test["col2_e"]))
        self.assertEqual(1, np.sum(df_test["col2_d"]))
        # do not touch continuous attribute
        self.assertTrue("col3" in df_test.columns)

    def test_to_and_from_json_booleans(self):
        # training data
        d = {"col1": ["a", "a", "c"], "col2": [True, True, False]}
        df = pd.DataFrame(data=d)
        # fit binarizer
        lb1 = LabelBinarizer()
        lb1.fit(df, "col1")
        lb2 = LabelBinarizer()
        lb2.fit(df, "col2")
        # test data
        d_test = {
            "col1": ["c", "c", "a"],
            "col2": [False, False, True],
            "col3": [2, 3, 4],
        }
        df_test = pd.DataFrame(data=d_test)
        # to json and from json
        new_lb1 = LabelBinarizer()
        new_lb2 = LabelBinarizer()
        new_lb1.from_json(lb1.to_json())
        new_lb2.from_json(json.loads(json.dumps(lb2.to_json(), indent=4)))

        # transform
        df_test = new_lb1.transform(df_test, "col1")
        df_test = new_lb2.transform(df_test, "col2")
        # for binary column, only one value is left, old column should be deleted
        self.assertTrue("col1_c" in df_test.columns)
        self.assertTrue("col1" not in df_test.columns)
        self.assertEqual(2, np.sum(df_test["col1_c"]))
        # for multiple value colum, all columns should be added
        self.assertTrue("col2_True" in df_test.columns)
        self.assertTrue("col2" not in df_test.columns)
        self.assertEqual(1, np.sum(df_test["col2_True"]))
        # do not touch continuous attribute
        self.assertTrue("col3" in df_test.columns)

    def test_inverse_transform_2_unique_strings(self):
        d = {"col1": ["a", "a", "c"]}
        df = pd.DataFrame(data=d)
        lb = LabelBinarizer()
        lb.fit(df, "col1")
        bb = lb.transform(df, "col1")
        self.assertTrue("col1_c" in bb.columns)
        self.assertTrue(np.sum(bb["col1_c"]) == 1)
        bb = lb.inverse_transform(bb)
        self.assertTrue("col1_c" not in bb.columns)

    def test_inverse_transform_strings(self):
        d = {"col2": ["w", "e", "d"]}
        df = pd.DataFrame(data=d)
        lb = LabelBinarizer()
        lb.fit(df, "col2")
        bb = lb.transform(df, "col2")
        self.assertTrue("col2_w" in bb.columns)
        self.assertTrue("col2_e" in bb.columns)
        self.assertTrue("col2_d" in bb.columns)
        self.assertTrue(np.sum(bb["col2_w"]) == 1)
        bb = lb.inverse_transform(bb)
        self.assertTrue("col2_w" not in bb.columns)

    def test_inverse_transform_booleans(self):
        d = {"col1": [True, False, True, True]}
        df = pd.DataFrame(data=d)
        lb = LabelBinarizer()
        lb.fit(df, "col1")

        bb = lb.transform(df, "col1")
        self.assertTrue("col1_True" in bb.columns)
        self.assertEqual(bb["col1_True"].dtype, "int64")
        self.assertEqual(bb["col1_True"][0], 1)
        self.assertEqual(bb["col1_True"][1], 0)
        self.assertEqual(bb["col1_True"][2], 1)
        self.assertEqual(bb["col1_True"][3], 1)

        bb = lb.inverse_transform(bb)
        self.assertTrue("col1_True" not in bb.columns)
        self.assertEqual(bb["col1"].dtype, "bool")
        self.assertEqual(bb["col1"][0], True)
        self.assertEqual(bb["col1"][1], False)
        self.assertEqual(bb["col1"][2], True)
        self.assertEqual(bb["col1"][3], True)


if __name__ == "__main__":
    unittest.main()

```

--------------------------------------------------------------------------------
/supervised/tuner/time_controller.py:
--------------------------------------------------------------------------------

```python
import logging
import time

import numpy as np

from supervised.utils.config import LOG_LEVEL

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)


class TimeController:
    def __init__(
        self, start_time, total_time_limit, model_time_limit, steps, algorithms
    ):
        self._start_time = start_time
        self._total_time_limit = total_time_limit
        self._model_time_limit = model_time_limit
        self._steps = steps
        self._algorithms = algorithms
        self._spend = []
        self._is_hill_climbing = "hill_climbing_1" in steps
        self._is_stacking = "stack" in steps

    def to_json(self):
        return {
            "total_time_limit": self._total_time_limit,
            "model_time_limit": self._model_time_limit,
            "steps": self._steps,
            "algorithms": self._algorithms,
            "spend": self._spend,
            "is_hill_climbing": self._is_hill_climbing,
            "is_stacking": self._is_stacking,
        }

    @staticmethod
    def from_json(data):
        if data is None:
            return None
        try:
            total_time_limit = data.get("total_time_limit")
            model_time_limit = data.get("model_time_limit")
            steps = data.get("steps")
            algorithms = data.get("algorithms")

            tc = TimeController(
                time.time(), total_time_limit, model_time_limit, steps, algorithms
            )
            tc._spend = data.get("spend")
            tc._start_time -= tc.already_spend()  # update time with already spend
            return tc
        except Exception as e:
            logger.error(f"Cant load TimeController from json, {str(e)}")
            pass
        return None

    def already_spend(self):
        return np.sum([s["train_time"] for s in self._spend])

    def time_should_use(self, fit_level):
        if self._total_time_limit is None:
            return 7 * 24 * 3600  # 7 days

        ratios = {
            "default_algorithms": 0.3,
            "not_so_random": 0.35,
            "mix_encoding": 0.05,
            "golden_features": 0.05,
            "kmeans_features": 0.05,
            "insert_random_feature": 0.05,
            "features_selection": 0.05,
            "hill_climbing_1": 0.2,  # enough to have only first step from hill climbing
            "boost_on_errors": 0.05,
            "stack": 0.2,
        }

        if (
            fit_level
            in [
                "default_algorithms",
                "not_so_random",
                "boost_on_errors",
                "mix_encoding",
                "golden_features",
                "kmeans_features",
                "insert_random_feature",
                "features_selection",
                "stack",
            ]
            or "hill_climbing" in fit_level
        ):
            ratio = 0
            for k, v in ratios.items():
                if k in self._steps:
                    ratio += v

            fl = fit_level
            if "hill_climbing" in fit_level:
                fl = "hill_climbing_1"

            ratio = ratios[fl] / ratio

            if "hill_climbing" in fit_level:
                # print("before hill climbing scale", ratio)
                hill_climbing_cnt = len(
                    [i for i in self._steps if "hill_climbing" in i]
                )
                ratio /= float(hill_climbing_cnt)

            should_use = self._total_time_limit * ratio

            return should_use

        return 0

    def compound_time_should_use(self, fit_level):
        compound = 0
        for step in self._steps:
            if step in [
                "adjust_validation",
                "simple_algorithms",
                # "default_algorithms",
                "ensemble",
                "ensemble_stacked",
            ]:
                continue
            time_should_use = self.time_should_use(step)
            compound += time_should_use

            if fit_level == step:
                break
        # if fit_level == "stack":
        #    compound -= 120 # leave time for ensemble
        # maybe not needed
        return compound

    def enough_time_for_step(self, fit_level):
        if fit_level in ["ensemble", "ensemble_stacked", "fairness"]:
            return True
        total_time_spend = time.time() - self._start_time
        compound = self.compound_time_should_use(fit_level)
        # print("Enough time for step", fit_level, np.round(total_time_spend,2), np.round(compound,2))
        if total_time_spend > compound:
            # dont train more
            return False

        return True

    def enough_time_for_model(self, model_type):
        if self._total_time_limit is None:
            return True

        time_left = self._total_time_limit - self.already_spend()
        spend = [s["train_time"] for s in self._spend if s["model_type"] == model_type]
        model_mean_spend = np.mean(spend)
        return model_mean_spend <= time_left

    def enough_time(self, model_type, step):
        """
        Check if there is enough time to train the next model.

        Parameters
        ----------
        model_type : str
            String with type of the model.

        step: str
            String with name of the step in the process of AutoML training.


        Returns
        -------
        bool
            `True` if there is time for training next model, `False` otherwise.
        """
        if step in ["ensemble", "ensemble_stacked"]:
            return True
        # if model_time_limit is set, train every model
        # do not apply total_time_limit
        if self._model_time_limit is not None:
            return True
        # no total time limit, just train, dont ask
        if self._total_time_limit is None:
            return True

        total_time_spend = time.time() - self._start_time
        time_left = self._total_time_limit - total_time_spend
        # no time left, do not train any more models, sorry ...
        if time_left < 0:
            # print("No time left", time_left)
            return False

        # check the fit level type
        # we dont want to spend too much time on one step
        if not self.enough_time_for_step(step):
            # print("Not enough time for step", step)
            return False

        # there is still time and model_type was not tested yet
        # we should try it
        if time_left > 0 and self.model_spend(model_type) == 0:
            return True

        # stacked models converge faster
        # dont need to check ...
        if step == "stack":
            return True
        # check if there is enough time for model to train
        return self.enough_time_for_model(model_type)

    def learner_time_limit(self, model_type, fit_level, k_folds):
        if self._total_time_limit is None:
            return 7 * 24 * 3600

        if self._model_time_limit is not None:
            return self._model_time_limit / k_folds

        # just train them ...
        if fit_level == "simple_algorithms":
            return None
        if fit_level == "default_algorithms":
            return None

        tune_algorithms = [
            a
            for a in self._algorithms
            if a not in ["Baseline", "Linear", "Decision Tree", "Nearest Neighbors"]
        ]
        tune_algs_cnt = len(tune_algorithms)
        if tune_algs_cnt == 0:
            return None

        time_elapsed = time.time() - self._start_time
        time_left = self._total_time_limit - time_elapsed

        if fit_level == "not_so_random":
            tt = self.time_should_use(fit_level)

            tt /= tune_algs_cnt  # give time equally for each algorithm
            tt /= k_folds  # time is per learner (per fold)
            return tt

        if "hill_climbing" in fit_level:
            tt = self.time_should_use(fit_level)
            tt /= tune_algs_cnt  # give time equally for each algorithm
            tt /= k_folds  # time is per learner (per fold)
            return tt

        if self._is_stacking and fit_level == "stack":
            tt = time_left
            tt /= tune_algs_cnt  # give time equally for each algorithm
            tt /= k_folds  # time is per learner (per fold)
            return tt

    def log_time(self, model_name, model_type, fit_level, train_time):
        self._spend += [
            {
                "model_name": model_name,
                "model_type": model_type,
                "fit_level": fit_level,
                "train_time": train_time,
            }
        ]
        # print(pd.DataFrame(self._spend))
        # print("Already spend", self.already_spend())

    def step_spend(self, step):
        return np.sum([s["train_time"] for s in self._spend if s["fit_level"] == step])

    def model_spend(self, model_type):
        return np.sum(
            [s["train_time"] for s in self._spend if s["model_type"] == model_type]
        )

```

--------------------------------------------------------------------------------
/supervised/callbacks/early_stopping.py:
--------------------------------------------------------------------------------

```python
import logging
import os

import numpy as np
import pandas as pd

from supervised.callbacks.callback import Callback
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import Metric

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)


class EarlyStopping(Callback):
    def __init__(self, params):
        super(EarlyStopping, self).__init__(params)
        self.name = params.get("name", "early_stopping")
        self.metric = Metric(params.get("metric"))
        self.max_no_improvement_cnt = params.get("max_no_improvement_cnt", 5)
        self.log_to_dir = params.get("log_to_dir")

        self.keep_best_model = params.get("keep_best_model", True)
        self.best_iter = {}
        self.best_loss = {}
        self.loss_values = {}
        self.best_models = {}
        self.best_y_predicted = {}
        self.best_y_oof = (
            None  # predictions computed on out of folds or on validation set
        )
        self.final_loss = (
            None  # final score computed on combined predictions from all learners
        )
        # path to best model local copy, only used if cannot deep copy
        self.best_model_paths = {}
        self.multiple_target = False
        self.target_columns = None

    def add_and_set_learner(self, learner):
        self.learners += [learner]
        self.learner = learner
        self.best_iter[learner.uid] = None
        self.best_loss[learner.uid] = self.metric.worst_value()
        self.loss_values[learner.uid] = {"train": [], "validation": [], "iters": []}
        self.best_models[learner.uid] = None
        self.best_model_paths[learner.uid] = None
        self.best_y_predicted[learner.uid] = None

    def on_learner_train_start(self, logs):
        self.no_improvement_cnt = 0

    def on_framework_train_end(self, logs):
        # aggregate predictions from all learners
        # it has two columns: 'prediction', 'target'
        logger.debug("early stopping on framework train end")
        self.best_y_oof = pd.concat(list(self.best_y_predicted.values()))
        self.best_y_oof.sort_index(inplace=True)
        # check for duplicates in index -> repeats of validation
        if np.sum(self.best_y_oof.index.duplicated()):
            # we need to aggregate predictions from multiple repeats
            target_cols = [c for c in self.best_y_oof.columns if "prediction" not in c]
            prediction_cols = [c for c in self.best_y_oof.columns if "prediction" in c]

            aggs = {}
            for t in target_cols:
                aggs[t] = "first"
            for p in prediction_cols:
                aggs[p] = "mean"
            # aggregate predictions from repeats
            self.best_y_oof = self.best_y_oof.groupby(
                target_cols + prediction_cols, level=0
            ).agg(aggs)

        sample_weight = None
        if "sample_weight" in self.best_y_oof.columns:
            sample_weight = self.best_y_oof["sample_weight"]

        if "prediction" in self.best_y_oof:
            self.final_loss = self.metric(
                self.best_y_oof[self.target_columns],
                self.best_y_oof["prediction"],
                sample_weight=sample_weight,
            )
        else:
            prediction_cols = [c for c in self.best_y_oof.columns if "prediction" in c]
            self.final_loss = self.metric(
                self.best_y_oof[self.target_columns],
                self.best_y_oof[prediction_cols],
                sample_weight=sample_weight,
            )

    def on_iteration_end(self, logs, predictions):
        train_loss = 0
        if predictions.get("y_train_predicted") is not None:
            train_loss = self.metric(
                predictions.get("y_train_true"),
                predictions.get("y_train_predicted"),
                predictions.get("sample_weight"),
            )

        validation_loss = self.metric(
            predictions.get("y_validation_true"),
            predictions.get("y_validation_predicted"),
            predictions.get("sample_weight_validation"),
        )
        self.loss_values[self.learner.uid]["train"] += [train_loss]
        self.loss_values[self.learner.uid]["validation"] += [validation_loss]
        self.loss_values[self.learner.uid]["iters"] += [logs.get("iter_cnt")]

        if self.metric.improvement(
            previous=self.best_loss[self.learner.uid], current=validation_loss
        ):
            y_validation_true = predictions.get("y_validation_true")
            self.no_improvement_cnt = 0
            self.best_iter[self.learner.uid] = logs.get("iter_cnt")
            self.best_loss[self.learner.uid] = validation_loss

            if len(y_validation_true.shape) == 1 or y_validation_true.shape[1] == 1:
                self.best_y_predicted[self.learner.uid] = pd.DataFrame(
                    {
                        "target": np.array(y_validation_true)
                        # y_validation_true.values.reshape(
                        #    y_validation_true.shape[0]
                        # )
                    },
                    index=predictions.get("validation_index"),
                )
                self.multiple_target = False
                self.target_columns = "target"
            else:
                # in case of Neural Networks and multi-class classification with one-hot encoding
                self.best_y_predicted[self.learner.uid] = pd.DataFrame(
                    y_validation_true, index=predictions.get("validation_index")
                )
                self.multiple_target = True
                self.target_columns = y_validation_true.columns

            y_validation_predicted = predictions.get("y_validation_predicted")

            if len(y_validation_predicted.shape) == 1:
                # only one prediction column (binary classification or regression)
                col = predictions.get("validation_columns", "prediction")
                self.best_y_predicted[self.learner.uid][col] = np.array(
                    y_validation_predicted
                )
            else:
                # several columns in multiclass classification
                cols = predictions.get("validation_columns")
                for i_col in range(y_validation_predicted.shape[1]):
                    self.best_y_predicted[self.learner.uid][
                        # "prediction_{}".format(i_col)
                        cols[i_col]
                    ] = y_validation_predicted[:, i_col]

            # store sample_weight
            sample_weight_validation = predictions.get("sample_weight_validation")
            if sample_weight_validation is not None:
                self.best_y_predicted[self.learner.uid]["sample_weight"] = np.array(
                    sample_weight_validation
                )
            # store sensitive features
            sensitive_features_validation = predictions.get(
                "sensitive_features_validation"
            )

            if sensitive_features_validation is not None:
                for col in list(sensitive_features_validation.columns):
                    self.best_y_predicted[self.learner.uid][
                        f"sensitive_{col}"
                    ] = np.array(sensitive_features_validation[col])

            self.best_models[self.learner.uid] = self.learner.copy()
            # if local copy is not available, save model and keep path
            if self.best_models[self.learner.uid] is None:
                self.best_model_paths[self.learner.uid] = self.learner.save()
        else:
            self.no_improvement_cnt += 1

        if self.no_improvement_cnt > self.max_no_improvement_cnt:
            self.learner.stop_training = True

        logger.info(
            "EarlyStopping.on_iteration_end, train loss: {}, validation loss: {}, "
            "no improvement cnt {}, iters {}".format(
                train_loss,
                validation_loss,
                self.no_improvement_cnt,
                len(self.loss_values[self.learner.uid]["iters"]),
            )
        )

        if self.log_to_dir is not None and self.learner.algorithm_short_name not in [
            "Xgboost",
            "Random Forest",
            "Extra Trees",
            "LightGBM",
            "CatBoost",
            "Neural Network",
        ]:
            sign = -1.0 if Metric.optimize_negative(self.metric.name) else 1.0
            with open(
                os.path.join(self.log_to_dir, f"{self.learner.name}_training.log"), "a"
            ) as fout:
                iteration = len(self.loss_values[self.learner.uid]["iters"])
                fout.write(f"{iteration},{sign*train_loss},{sign*validation_loss}\n")

    def get_status(self):
        return "Train loss: {}, Validation loss: {} @ iteration {}".format(
            self.loss_values[self.learner.uid]["train"][-1],
            self.loss_values[self.learner.uid]["validation"][-1],
            len(self.loss_values[self.learner.uid]["iters"]),
        )

```

--------------------------------------------------------------------------------
/supervised/algorithms/decision_tree.py:
--------------------------------------------------------------------------------

```python
import logging
import os
import warnings

import numpy as np
import sklearn
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from supervised.algorithms.registry import (
    BINARY_CLASSIFICATION,
    MULTICLASS_CLASSIFICATION,
    REGRESSION,
    AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import SklearnAlgorithm
from supervised.utils.config import LOG_LEVEL

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)

import dtreeviz
from sklearn.tree import _tree

from supervised.utils.subsample import subsample


def get_rules(tree, feature_names, class_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]

    paths = []
    path = []

    def recurse(node, path, paths):
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            p1, p2 = list(path), list(path)
            p1 += [f"({name} <= {np.round(threshold, 3)})"]
            recurse(tree_.children_left[node], p1, paths)
            p2 += [f"({name} > {np.round(threshold, 3)})"]
            recurse(tree_.children_right[node], p2, paths)
        else:
            path += [(tree_.value[node], tree_.n_node_samples[node])]
            paths += [path]

    recurse(0, path, paths)

    # sort by samples count
    samples_count = [p[-1][1] for p in paths]
    ii = list(np.argsort(samples_count))
    paths = [paths[i] for i in reversed(ii)]

    rules = []
    for path in paths:
        rule = "if "

        for p in path[:-1]:
            if rule != "if ":
                rule += " and "
            rule += str(p)
        rule += " then "
        if class_names is None:
            rule += "response: " + str(np.round(path[-1][0][0][0], 3))
        else:
            classes = path[-1][0][0]
            l = np.argmax(classes)
            rule += f"class: {class_names[l]} (proba: {np.round(100.0*classes[l]/np.sum(classes),2)}%)"
        rule += f" | based on {path[-1][1]:,} samples"
        rules += [rule]

    return rules


def save_rules(tree, feature_names, class_names, model_file_path, learner_name):
    try:
        rules = get_rules(tree, feature_names, class_names)
        fname = os.path.join(model_file_path, f"{learner_name}_rules.txt")
        with open(fname, "w") as fout:
            for r in rules:
                fout.write(r + "\n\n")
    except Exception as e:
        logger.info(f"Problem with extracting decision tree rules. {str(e)}")


class DecisionTreeAlgorithm(ClassifierMixin, SklearnAlgorithm):
    algorithm_name = "Decision Tree"
    algorithm_short_name = "Decision Tree"

    def __init__(self, params):
        super(DecisionTreeAlgorithm, self).__init__(params)
        logger.debug("DecisionTreeAlgorithm.__init__")
        self.library_version = sklearn.__version__
        self.max_iters = additional.get("max_steps", 1)
        self.model = DecisionTreeClassifier(
            criterion=params.get("criterion", "gini"),
            max_depth=params.get("max_depth", 3),
            random_state=params.get("seed", 1),
        )

    def file_extension(self):
        return "decision_tree"

    def interpret(
        self,
        X_train,
        y_train,
        X_validation,
        y_validation,
        model_file_path,
        learner_name,
        target_name=None,
        class_names=None,
        metric_name=None,
        ml_task=None,
        explain_level=2,
    ):
        super(DecisionTreeAlgorithm, self).interpret(
            X_train,
            y_train,
            X_validation,
            y_validation,
            model_file_path,
            learner_name,
            target_name,
            class_names,
            metric_name,
            ml_task,
            explain_level,
        )
        if explain_level == 0:
            return
        with warnings.catch_warnings():
            warnings.simplefilter(action="ignore")
            try:
                if len(class_names) > 10:
                    # dtreeviz does not support more than 10 classes
                    return

                viz = dtreeviz.model(
                    self.model,
                    X_train,
                    y_train,
                    target_name="target",
                    feature_names=X_train.columns,
                    class_names=class_names,
                )
                tree_file_plot = os.path.join(
                    model_file_path, learner_name + "_tree.svg"
                )
                viz.view().save(tree_file_plot)
            except Exception as e:
                logger.info(f"Problem when visualizing decision tree. {str(e)}")

            save_rules(
                self.model, X_train.columns, class_names, model_file_path, learner_name
            )


class DecisionTreeRegressorAlgorithm(RegressorMixin, SklearnAlgorithm):
    algorithm_name = "Decision Tree"
    algorithm_short_name = "Decision Tree"

    def __init__(self, params):
        super(DecisionTreeRegressorAlgorithm, self).__init__(params)
        logger.debug("DecisionTreeRegressorAlgorithm.__init__")
        self.library_version = sklearn.__version__
        self.max_iters = additional.get("max_steps", 1)
        self.model = DecisionTreeRegressor(
            criterion=params.get("criterion", "squared_error"),
            max_depth=params.get("max_depth", 3),
            random_state=params.get("seed", 1),
        )

    def file_extension(self):
        return "decision_tree"

    def interpret(
        self,
        X_train,
        y_train,
        X_validation,
        y_validation,
        model_file_path,
        learner_name,
        target_name=None,
        class_names=None,
        metric_name=None,
        ml_task=None,
        explain_level=2,
    ):
        super(DecisionTreeRegressorAlgorithm, self).interpret(
            X_train,
            y_train,
            X_validation,
            y_validation,
            model_file_path,
            learner_name,
            target_name,
            class_names,
            metric_name,
            ml_task,
            explain_level,
        )
        if explain_level == 0:
            return
        with warnings.catch_warnings():
            warnings.simplefilter(action="ignore")
            try:
                # 250 is hard limit for number of points used in visualization
                # if too many points are used then final SVG plot is very large (can be > 100MB)
                if X_train.shape[0] > 250:
                    x, _, y, _ = subsample(X_train, y_train, REGRESSION, 250)
                    viz = dtreeviz(
                        self.model,
                        x,
                        y,
                        target_name="target",
                        feature_names=x.columns,
                    )
                else:
                    viz = dtreeviz.model(
                        self.model,
                        X_train,
                        y_train,
                        target_name="target",
                        feature_names=X_train.columns,
                    )
                tree_file_plot = os.path.join(
                    model_file_path, learner_name + "_tree.svg"
                )
                viz.view().save(tree_file_plot)
            except Exception as e:
                logger.info(
                    f"Problem when visuzalizin decision tree regressor. {str(e)}"
                )

            save_rules(self.model, X_train.columns, None, model_file_path, learner_name)


dt_params = {"criterion": ["gini", "entropy"], "max_depth": [2, 3, 4]}

classification_default_params = {"criterion": "gini", "max_depth": 3}

additional = {
    "trees_in_step": 1,
    "train_cant_improve_limit": 0,
    "max_steps": 1,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "target_as_integer",
]

AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    DecisionTreeAlgorithm,
    dt_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    DecisionTreeAlgorithm,
    dt_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

dt_regression_params = {
    "criterion": [
        "squared_error",
        "friedman_mse",
    ],  # remove "mae" because it slows down a lot https://github.com/scikit-learn/scikit-learn/issues/9626
    "max_depth": [2, 3, 4],
}
regression_required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
]

regression_default_params = {"criterion": "squared_error", "max_depth": 3}

AlgorithmsRegistry.add(
    REGRESSION,
    DecisionTreeRegressorAlgorithm,
    dt_regression_params,
    regression_required_preprocessing,
    additional,
    regression_default_params,
)

```

--------------------------------------------------------------------------------
/tests/tests_automl/test_explain_levels.py:
--------------------------------------------------------------------------------

```python
import os
import shutil
import unittest

import pandas as pd
from sklearn import datasets

from supervised import AutoML
from supervised.algorithms.random_forest import additional

additional["max_steps"] = 3
additional["trees_in_step"] = 1

from supervised.algorithms.xgboost import additional

additional["max_rounds"] = 1


class AutoMLExplainLevelsTest(unittest.TestCase):
    automl_dir = "AutoMLExplainLevelsTest"

    def setUp(self):
        shutil.rmtree(self.automl_dir, ignore_errors=True)

    def tearDown(self):
        shutil.rmtree(self.automl_dir, ignore_errors=True)

    def run_explain_default(self, task, alg):
        shutil.rmtree(self.automl_dir, ignore_errors=True)
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=10,
            algorithms=[alg],
            train_ensemble=False,
            validation_strategy={
                "validation_type": "kfold",
                "k_folds": 2,
                "shuffle": True,
                "stratify": True,
            },
            start_random_models=1,
        )

        if task == "binary":
            X, y = datasets.make_classification(
                n_samples=100,
                n_features=5,
                n_informative=4,
                n_redundant=1,
                n_classes=2,
                n_clusters_per_class=3,
                n_repeated=0,
                shuffle=False,
                random_state=0,
            )
        elif task == "multi":
            X, y = datasets.make_classification(
                n_samples=100,
                n_features=5,
                n_informative=4,
                n_redundant=1,
                n_classes=5,
                n_clusters_per_class=3,
                n_repeated=0,
                shuffle=False,
                random_state=0,
            )
        else:
            X, y = datasets.make_regression(
                n_samples=100,
                n_features=5,
                n_informative=4,
                shuffle=False,
                random_state=0,
            )

        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)

        result_files = os.listdir(
            os.path.join(self.automl_dir, f'1_Default_{alg.replace(" ", "")}')
        )

        # There should be files with:
        # - permutation importance
        # - shap importance
        # - shap dependence
        # - shap decisions

        # Check permutation importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" not in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap dependence
        produced = False
        for f in result_files:
            if "shap_dependence" in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap decisions
        produced = False
        for f in result_files:
            if "decisions.png" in f:
                produced = True
                break
        self.assertTrue(produced)

    # def test_explain_default(self):

    #     for task in ["binary", "multi", "regression"]:
    #         for alg in ["Xgboost", "Random Forest", "LightGBM"]:
    #             self.run_explain_default(task, alg)

    def test_no_explain_linear(self):
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Linear"],
            train_ensemble=False,
            validation_strategy={
                "validation_type": "kfold",
                "k_folds": 2,
                "shuffle": True,
                "stratify": True,
            },
            explain_level=0,
            start_random_models=1,
        )

        X, y = datasets.make_regression(
            n_samples=100, n_features=5, n_informative=4, shuffle=False, random_state=0
        )
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)

        result_files = os.listdir(os.path.join(self.automl_dir, "1_Linear"))

        # There should be no files with:
        # - permutation importance
        # - shap importance
        # - shap dependence
        # - shap decisions

        # Check permutation importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" not in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check shap importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check shap dependence
        produced = False
        for f in result_files:
            if "dependence.png" in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check shap decisions
        produced = False
        for f in result_files:
            if "decisions.png" in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check coefficients
        produced = False
        for f in result_files:
            if "coefs.csv" in f:
                produced = True
                break
        self.assertFalse(produced)

    def test_explain_just_permutation_importance(self):
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            validation_strategy={
                "validation_type": "kfold",
                "k_folds": 2,
                "shuffle": True,
                "stratify": True,
            },
            explain_level=1,
            start_random_models=1,
        )

        X, y = datasets.make_regression(
            n_samples=100, n_features=5, n_informative=4, shuffle=False, random_state=0
        )
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)

        result_files = os.listdir(os.path.join(self.automl_dir, "1_Default_Xgboost"))

        # There should be no files with:
        # - permutation importance
        # - shap importance
        # - shap dependence
        # - shap decisions

        # Check permutation importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" not in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check shap dependence
        produced = False
        for f in result_files:
            if "dependence.png" in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check shap decisions
        produced = False
        for f in result_files:
            if "decisions.png" in f:
                produced = True
                break
        self.assertFalse(produced)

    def test_build_decision_tree(self):
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=10,
            algorithms=["Decision Tree"],
            train_ensemble=False,
            validation_strategy={
                "validation_type": "kfold",
                "k_folds": 2,
                "shuffle": True,
                "stratify": True,
            },
            explain_level=2,
            start_random_models=1,
        )

        X, y = datasets.make_regression(
            n_samples=100, n_features=5, n_informative=4, shuffle=False, random_state=0
        )
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)

        result_files = os.listdir(os.path.join(self.automl_dir, "1_DecisionTree"))

        # There should be files with:
        # - decision tree visualization
        # - permutation importance
        # - shap importance
        # - shap dependence
        # - shap decisions

        # Check Decision Tree visualization
        produced = False
        for f in result_files:
            if "tree.svg" in f:
                produced = True
                break
        # disable  ??? TODO
        # self.assertTrue(produced)

        # Check permutation importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" not in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap dependence
        produced = False
        for f in result_files:
            if "dependence.png" in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap decisions
        produced = False
        for f in result_files:
            if "decisions.png" in f:
                produced = True
                break
        self.assertTrue(produced)

```

--------------------------------------------------------------------------------
/tests/tests_automl/test_targets.py:
--------------------------------------------------------------------------------

```python
import shutil
import unittest
import pytest

import numpy as np
import pandas as pd

from supervised import AutoML
from supervised.algorithms.xgboost import additional
from supervised.exceptions import AutoMLException

additional["max_rounds"] = 1


class AutoMLTargetsTest(unittest.TestCase):
    automl_dir = "automl_tests"
    rows = 50

    def tearDown(self):
        shutil.rmtree(self.automl_dir, ignore_errors=True)

    def test_bin_class_01(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)
        self.assertTrue(0 in u or 1 in u)
        self.assertTrue(len(u) <= 2)

    def test_bin_class_11(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows) * 2 - 1

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        p = automl.predict(X)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(-1 in u or 1 in u)
        self.assertTrue(0 not in u)
        self.assertTrue(len(u) <= 2)

    def test_bin_class_AB(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.permutation(["a", "B"] * int(self.rows / 2))

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        p = automl.predict(X)
        pred = automl.predict(X)
        u = np.unique(pred)
        self.assertTrue("a" in u or "B" in u)
        self.assertTrue(len(u) <= 2)

    def test_bin_class_AB_missing_targets(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(
            np.random.permutation(["a", "B"] * int(self.rows / 2)), name="target"
        )

        y.iloc[1] = None
        y.iloc[3] = np.NaN
        y.iloc[13] = np.nan

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )

        with pytest.warns(
            expected_warning=UserWarning,
            match="There are samples with missing target values in the data which will be excluded for further analysis",
        ) as record:
            automl.fit(X, y)

        # check that only one warning was raised
        self.assertEqual(len(record), 1)

        p = automl.predict(X)
        pred = automl.predict(X)

        u = np.unique(pred)
        self.assertTrue("a" in u or "B" in u)
        self.assertTrue(len(u) <= 2)

    def test_multi_class_0123_floats(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 4, self.rows * 4)
        y = y.astype(float)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(0.0 in u or 1.0 in u or 2.0 in u or 3.0 in u)
        self.assertTrue(len(u) <= 4)

    def test_multi_class_0123(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 4, self.rows * 4)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(0 in u or 1 in u or 2 in u or 3 in u)
        self.assertTrue(len(u) <= 4)

    def test_multi_class_0123_strings(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 4, self.rows * 4)
        y = y.astype(str)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue("0" in u or "1" in u or "2" in u or "3" in u)
        self.assertTrue(len(u) <= 4)

    def test_multi_class_abcd(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(
            np.random.permutation(["a", "B", "CC", "d"] * self.rows), name="target"
        )

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(np.intersect1d(u, ["a", "B", "CC", "d"]).shape[0] > 0)
        self.assertTrue(len(u) <= 4)

    def test_multi_class_abcd_np_array(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.permutation([None, "B", "CC", "d"] * self.rows)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(np.intersect1d(u, ["a", "B", "CC", "d"]).shape[0] > 0)
        self.assertTrue(len(u) <= 4)

    def test_multi_class_abcd_mixed_int(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(
            np.random.permutation([1, "B", "CC", "d"] * self.rows), name="target"
        )

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)
        u = np.unique(pred)

        self.assertTrue(np.intersect1d(u, ["a", "B", "CC", "d"]).shape[0] > 0)
        self.assertTrue(len(u) <= 4)

    def test_multi_class_abcd_missing_target(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(
            np.random.permutation(["a", "B", "CC", "d"] * self.rows), name="target"
        )

        y.iloc[0] = None
        y.iloc[1] = None
        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )

        with pytest.warns(
            expected_warning=UserWarning,
            match="There are samples with missing target values in the data which will be excluded for further analysis",
        ) as record:
            automl.fit(X, y)

        # check that only one warning was raised
        self.assertEqual(len(record), 1)

        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(np.intersect1d(u, ["a", "B", "CC", "d"]).shape[0] > 0)
        self.assertTrue(len(u) <= 4)

    def test_regression(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.rand(self.rows)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        self.assertIsInstance(pred, np.ndarray)
        self.assertEqual(len(pred), X.shape[0])

    def test_regression_missing_target(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.rand(self.rows), name="target")

        y.iloc[1] = None

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )

        with pytest.warns(
            match="There are samples with missing target values in the data which will be excluded for further analysis"
        ) as record:
            automl.fit(X, y)

        self.assertEqual(len(record), 1)

        pred = automl.predict(X)

        self.assertIsInstance(pred, np.ndarray)
        self.assertEqual(len(pred), X.shape[0])

    def test_predict_on_empty_dataframe(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.rand(self.rows), name="target")

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)

        with self.assertRaises(AutoMLException) as context:
            pred = automl.predict(pd.DataFrame())

        with self.assertRaises(AutoMLException) as context:
            pred = automl.predict(np.empty(shape=(0, 3)))

```

--------------------------------------------------------------------------------
/supervised/preprocessing/goldenfeatures_transformer.py:
--------------------------------------------------------------------------------

```python
import itertools
import json
import os
import time

import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from sklearn.metrics import log_loss, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from supervised.algorithms.registry import (
    BINARY_CLASSIFICATION,
    MULTICLASS_CLASSIFICATION,
    REGRESSION,
)
from supervised.exceptions import AutoMLException
from supervised.utils.jsonencoder import MLJSONEncoder


def get_binary_score(X_train, y_train, X_test, y_test):
    clf = DecisionTreeClassifier(max_depth=3)
    clf.fit(X_train, y_train)
    pred = clf.predict_proba(X_test)[:, 1]
    ll = log_loss(y_test, pred)
    return ll


def get_regression_score(X_train, y_train, X_test, y_test):
    clf = DecisionTreeRegressor(max_depth=3)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    ll = mean_squared_error(y_test, pred)
    return ll


def get_multiclass_score(X_train, y_train, X_test, y_test):
    clf = DecisionTreeClassifier(max_depth=3)
    clf.fit(X_train, y_train)
    pred = clf.predict_proba(X_test)
    ll = log_loss(y_test, pred)
    return ll


def get_score(item):
    col1 = item[0]
    col2 = item[1]
    X_train = item[2]
    y_train = item[3]
    X_test = item[4]
    y_test = item[5]
    scorer = item[6]

    try:
        x_train = np.array(X_train[col1] - X_train[col2]).reshape(-1, 1)
        x_test = np.array(X_test[col1] - X_test[col2]).reshape(-1, 1)
        diff_score = scorer(x_train, y_train, x_test, y_test)
    except Exception as e:
        diff_score = None
        print(str(e))

    try:
        a, b = (
            np.array(X_train[col1], dtype=float),
            np.array(X_train[col2], dtype=float),
        )
        x_train = np.divide(a, b, out=np.zeros_like(a), where=b != 0).reshape(-1, 1)
        a, b = np.array(X_test[col1], dtype=float), np.array(X_test[col2], dtype=float)
        x_test = np.divide(a, b, out=np.zeros_like(a), where=b != 0).reshape(-1, 1)
        ratio_1_score = scorer(x_train, y_train, x_test, y_test)
    except Exception as e:
        print(str(e))
        ratio_1_score = None

    try:
        b, a = (
            np.array(X_train[col1], dtype=float),
            np.array(X_train[col2], dtype=float),
        )
        x_train = np.divide(a, b, out=np.zeros_like(a), where=b != 0).reshape(-1, 1)
        b, a = np.array(X_test[col1], dtype=float), np.array(X_test[col2], dtype=float)
        x_test = np.divide(a, b, out=np.zeros_like(a), where=b != 0).reshape(-1, 1)
        ratio_2_score = scorer(x_train, y_train, x_test, y_test)
    except Exception as e:
        print(str(e))
        ratio_2_score = None

    try:
        x_train = np.array(X_train[col1] + X_train[col2]).reshape(-1, 1)
        x_test = np.array(X_test[col1] + X_test[col2]).reshape(-1, 1)
        sum_score = scorer(x_train, y_train, x_test, y_test)
    except Exception as e:
        sum_score = None
        print(str(e))

    try:
        x_train = np.array(X_train[col1] * X_train[col2]).reshape(-1, 1)
        x_test = np.array(X_test[col1] * X_test[col2]).reshape(-1, 1)
        multiply_score = scorer(x_train, y_train, x_test, y_test)
    except Exception as e:
        multiply_score = None
        print(str(e))

    return (diff_score, ratio_1_score, ratio_2_score, sum_score, multiply_score)


class GoldenFeaturesTransformer(object):
    def __init__(self, results_path=None, ml_task=None, features_count=None, n_jobs=-1):
        self._new_features = []
        self._new_columns = []
        self._ml_task = ml_task
        self._features_count = features_count
        self._n_jobs = n_jobs
        self._scorer = None
        if self._ml_task == BINARY_CLASSIFICATION:
            self._scorer = get_binary_score
        elif self._ml_task == MULTICLASS_CLASSIFICATION:
            self._scorer = get_multiclass_score
        else:
            self._scorer = get_regression_score

        self._error = None

        self._result_file = "golden_features.json"
        if results_path is not None:
            self._result_path = os.path.join(results_path, self._result_file)

            if os.path.exists(self._result_path):
                with open(self._result_path, "r") as file:
                    self.from_json(json.load(file), results_path)

    def fit(self, X, y):
        if self._new_features:
            return
        if self._error is not None and self._error:
            raise AutoMLException(
                "Golden Features not created due to error (please check errors.md). "
                + self._error
            )
            return
        if X.shape[1] == 0:
            self._error = f"Golden Features not created. No continous features. Input data shape: {X.shape}, {y.shape}"
            self.save()
            raise AutoMLException("Golden Features not created. No continous features.")

        start_time = time.time()
        combinations = itertools.combinations(X.columns, r=2)
        items = [i for i in combinations]
        if len(items) > 250000:
            si = np.random.choice(len(items), 250000, replace=False)
            items = [items[i] for i in si]

        X_train, X_test, y_train, y_test = self._subsample(X, y)

        for i in range(len(items)):
            items[i] += (X_train, y_train, X_test, y_test, self._scorer)

        scores = []
        # parallel version
        scores = Parallel(n_jobs=self._n_jobs, backend="loky")(
            delayed(get_score)(i) for i in items
        )

        # single process version
        # for item in items:
        #    scores += [get_score(item)]

        if not scores:
            self._error = f"Golden Features not created. Empty scores. Input data shape: {X.shape}, {y.shape}"
            self.save()
            raise AutoMLException("Golden Features not created. Empty scores.")

        result = []
        for i in range(len(items)):
            if scores[i][0] is not None:
                result += [(items[i][0], items[i][1], "diff", scores[i][0])]
            if scores[i][1] is not None:
                result += [(items[i][0], items[i][1], "ratio", scores[i][1])]
            if scores[i][2] is not None:
                result += [(items[i][1], items[i][0], "ratio", scores[i][2])]
            if scores[i][3] is not None:
                result += [(items[i][1], items[i][0], "sum", scores[i][3])]
            if scores[i][4] is not None:
                result += [(items[i][1], items[i][0], "multiply", scores[i][4])]

        df = pd.DataFrame(
            result, columns=["feature1", "feature2", "operation", "score"]
        )
        df.sort_values(by="score", inplace=True)

        new_cols_cnt = np.min([100, np.max([10, int(0.1 * X.shape[1])])])

        if (
            self._features_count is not None
            and self._features_count > 0
            and self._features_count < df.shape[0]
        ):
            new_cols_cnt = self._features_count

        print(self._features_count, new_cols_cnt)
        self._new_features = json.loads(df.head(new_cols_cnt).to_json(orient="records"))

        for new_feature in self._new_features:
            new_col = "_".join(
                [
                    new_feature["feature1"],
                    new_feature["operation"],
                    new_feature["feature2"],
                ]
            )
            self._new_columns += [new_col]
            print(f"Add Golden Feature: {new_col}")

        self.save()

        print(
            f"Created {len(self._new_features)} Golden Features in {np.round(time.time() - start_time,2)} seconds."
        )

    def transform(self, X):
        for new_feature in self._new_features:
            new_col = "_".join(
                [
                    new_feature["feature1"],
                    new_feature["operation"],
                    new_feature["feature2"],
                ]
            )
            if new_feature["operation"] == "diff":
                X[new_col] = X[new_feature["feature1"]] - X[new_feature["feature2"]]
            elif new_feature["operation"] == "ratio":
                a, b = (
                    np.array(X[new_feature["feature1"]], dtype=float),
                    np.array(X[new_feature["feature2"]], dtype=float),
                )
                X[new_col] = np.divide(
                    a, b, out=np.zeros_like(a), where=b != 0
                ).reshape(-1, 1)
            elif new_feature["operation"] == "sum":
                X[new_col] = X[new_feature["feature1"]] + X[new_feature["feature2"]]
            elif new_feature["operation"] == "multiply":
                X[new_col] = X[new_feature["feature1"]] * X[new_feature["feature2"]]

        return X

    def to_json(self):
        data_json = {
            "new_features": self._new_features,
            "new_columns": self._new_columns,
            "ml_task": self._ml_task,
        }
        if self._error is not None and self._error:
            data_json["error"] = self._error
        return data_json

    def from_json(self, data_json, results_path):
        self._new_features = data_json.get("new_features", [])
        self._new_columns = data_json.get("new_columns", [])
        self._ml_task = data_json.get("ml_task")
        self._error = data_json.get("error")
        self._result_path = os.path.join(results_path, self._result_file)

    def save(self):
        with open(self._result_path, "w") as fout:
            fout.write(json.dumps(self.to_json(), indent=4, cls=MLJSONEncoder))

    def _subsample(self, X, y):
        MAX_SIZE = 10000
        TRAIN_SIZE = 2500

        shuffle = True
        stratify = None

        if X.shape[0] > MAX_SIZE:
            if self._ml_task != REGRESSION:
                stratify = y
            X_train, _, y_train, _ = train_test_split(
                X,
                y,
                train_size=MAX_SIZE,
                shuffle=shuffle,
                stratify=stratify,
                random_state=1,
            )
            if self._ml_task != REGRESSION:
                stratify = y_train

            X_train, X_test, y_train, y_test = train_test_split(
                X_train,
                y_train,
                train_size=TRAIN_SIZE,
                shuffle=shuffle,
                stratify=stratify,
                random_state=1,
            )
        else:
            if self._ml_task != REGRESSION:
                stratify = y
            train_size = X.shape[0] // 4
            X_train, X_test, y_train, y_test = train_test_split(
                X,
                y,
                train_size=train_size,
                shuffle=shuffle,
                stratify=stratify,
                random_state=1,
            )

        return X_train, X_test, y_train, y_test

```

--------------------------------------------------------------------------------
/supervised/tuner/optuna/tuner.py:
--------------------------------------------------------------------------------

```python
import json
import os
import warnings

import joblib
import matplotlib
import optuna
from matplotlib import pyplot as plt

from supervised.exceptions import AutoMLException
from supervised.preprocessing.preprocessing_utils import PreprocessingUtils
from supervised.tuner.optuna.catboost import CatBoostObjective
from supervised.tuner.optuna.extra_trees import ExtraTreesObjective
from supervised.tuner.optuna.knn import KNNObjective
from supervised.tuner.optuna.lightgbm import LightgbmObjective
from supervised.tuner.optuna.nn import NeuralNetworkObjective
from supervised.tuner.optuna.random_forest import RandomForestObjective
from supervised.tuner.optuna.xgboost import XgboostObjective
from supervised.utils.jsonencoder import MLJSONEncoder
from supervised.utils.metric import Metric


class OptunaTuner:
    def __init__(
        self,
        results_path,
        ml_task,
        eval_metric,
        time_budget=3600,
        init_params={},
        verbose=True,
        n_jobs=-1,
        random_state=42,
    ):
        if eval_metric.name not in [
            "auc",
            "logloss",
            "rmse",
            "mse",
            "mae",
            "mape",
            "r2",
            "spearman",
            "pearson",
            "f1",
            "average_precision",
            "accuracy",
            "user_defined_metric",
        ]:
            raise AutoMLException(f"Metric {eval_metric.name} is not supported")

        self.study_dir = os.path.join(results_path, "optuna")
        if not os.path.exists(self.study_dir):
            try:
                os.mkdir(self.study_dir)
            except Exception as e:
                print("Problem while creating directory for optuna studies.", str(e))
        self.tuning_fname = os.path.join(self.study_dir, "optuna.json")
        self.tuning = init_params
        self.eval_metric = eval_metric

        self.direction = (
            "maximize" if Metric.optimize_negative(eval_metric.name) else "minimize"
        )
        self.n_warmup_steps = (
            500  # set large enough to give small learning rates a chance
        )
        self.time_budget = time_budget
        self.verbose = verbose
        self.ml_task = ml_task
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.cat_features_indices = []
        self.load()
        if not self.verbose:
            optuna.logging.set_verbosity(optuna.logging.CRITICAL)

    @staticmethod
    def is_optimizable(algorithm_name):
        return algorithm_name in [
            "Extra Trees",
            "Random Forest",
            "CatBoost",
            "Xgboost",
            "LightGBM",
            "Nearest Neighbors",
            "Neural Network",
        ]

    def optimize(
        self,
        algorithm,
        data_type,
        X_train,
        y_train,
        sample_weight,
        X_validation,
        y_validation,
        sample_weight_validation,
        learner_params,
    ):
        # only tune models with original data type
        if data_type != "original":
            return learner_params

        key = f"{data_type}_{algorithm}"
        if key in self.tuning:
            return self.update_learner_params(learner_params, self.tuning[key])

        if self.verbose:
            print(
                f"Optuna optimizes {algorithm} with time budget {self.time_budget} seconds "
                f"eval_metric {self.eval_metric.name} ({self.direction})"
            )

        self.cat_features_indices = []
        for i in range(X_train.shape[1]):
            if PreprocessingUtils.is_categorical(X_train.iloc[:, i]):
                self.cat_features_indices += [i]

        study = optuna.create_study(
            direction=self.direction,
            sampler=optuna.samplers.TPESampler(seed=self.random_state),
            pruner=optuna.pruners.MedianPruner(n_warmup_steps=self.n_warmup_steps),
        )
        obejctive = None
        if algorithm == "LightGBM":
            objective = LightgbmObjective(
                self.ml_task,
                X_train,
                y_train,
                sample_weight,
                X_validation,
                y_validation,
                sample_weight_validation,
                self.eval_metric,
                self.cat_features_indices,
                self.n_jobs,
                self.random_state,
            )
        elif algorithm == "Xgboost":
            objective = XgboostObjective(
                self.ml_task,
                X_train,
                y_train,
                sample_weight,
                X_validation,
                y_validation,
                sample_weight_validation,
                self.eval_metric,
                self.n_jobs,
                self.random_state,
            )
        elif algorithm == "CatBoost":
            objective = CatBoostObjective(
                self.ml_task,
                X_train,
                y_train,
                sample_weight,
                X_validation,
                y_validation,
                sample_weight_validation,
                self.eval_metric,
                self.cat_features_indices,
                self.n_jobs,
                self.random_state,
            )
        elif algorithm == "Random Forest":
            objective = RandomForestObjective(
                self.ml_task,
                X_train,
                y_train,
                sample_weight,
                X_validation,
                y_validation,
                sample_weight_validation,
                self.eval_metric,
                self.n_jobs,
                self.random_state,
            )
        elif algorithm == "Extra Trees":
            objective = ExtraTreesObjective(
                self.ml_task,
                X_train,
                y_train,
                sample_weight,
                X_validation,
                y_validation,
                sample_weight_validation,
                self.eval_metric,
                self.n_jobs,
                self.random_state,
            )
        elif algorithm == "Nearest Neighbors":
            objective = KNNObjective(
                self.ml_task,
                X_train,
                y_train,
                sample_weight,
                X_validation,
                y_validation,
                sample_weight_validation,
                self.eval_metric,
                self.n_jobs,
                self.random_state,
            )
        elif algorithm == "Neural Network":
            objective = NeuralNetworkObjective(
                self.ml_task,
                X_train,
                y_train,
                sample_weight,
                X_validation,
                y_validation,
                sample_weight_validation,
                self.eval_metric,
                self.n_jobs,
                self.random_state,
            )

        study.optimize(
            objective, n_trials=5000, timeout=self.time_budget, gc_after_trial=True
        )

        self.plot_study(algorithm, data_type, study)

        joblib.dump(study, os.path.join(self.study_dir, key + ".joblib"))

        best = study.best_params

        if algorithm == "LightGBM":
            best["metric"] = objective.eval_metric_name
            best["custom_eval_metric_name"] = objective.custom_eval_metric_name
            best["num_boost_round"] = objective.rounds
            best["early_stopping_rounds"] = objective.early_stopping_rounds
            # best["learning_rate"] = objective.learning_rate
            best["cat_feature"] = self.cat_features_indices
            best["feature_pre_filter"] = False
            best["seed"] = objective.seed
        elif algorithm == "CatBoost":
            best["eval_metric"] = objective.eval_metric_name
            best["num_boost_round"] = objective.rounds
            best["early_stopping_rounds"] = objective.early_stopping_rounds
            # best["bootstrap_type"] = "Bernoulli"
            # best["learning_rate"] = objective.learning_rate
            best["seed"] = objective.seed
        elif algorithm == "Xgboost":
            best["objective"] = objective.objective
            best["eval_metric"] = objective.eval_metric_name
            # best["eta"] = objective.learning_rate
            best["max_rounds"] = objective.rounds
            best["early_stopping_rounds"] = objective.early_stopping_rounds
            best["seed"] = objective.seed
        elif algorithm == "Extra Trees":
            # Extra Trees are not using early stopping
            best["max_steps"] = objective.max_steps  # each step has 100 trees
            best["seed"] = objective.seed
            best["eval_metric_name"] = self.eval_metric.name
        elif algorithm == "Random Forest":
            # Random Forest is not using early stopping
            best["max_steps"] = objective.max_steps  # each step has 100 trees
            best["seed"] = objective.seed
            best["eval_metric_name"] = self.eval_metric.name
        elif algorithm == "Nearest Neighbors":
            best["rows_limit"] = 100000
        elif algorithm == "Neural Network":
            pass

        self.tuning[key] = best
        self.save()

        return self.update_learner_params(learner_params, best)

    def update_learner_params(self, learner_params, best):
        for k, v in best.items():
            learner_params[k] = v
        return learner_params

    def save(self):
        with open(self.tuning_fname, "w") as fout:
            fout.write(json.dumps(self.tuning, indent=4, cls=MLJSONEncoder))

    def load(self):
        if os.path.exists(self.tuning_fname):
            params = json.loads(open(self.tuning_fname).read())
            for k, v in params.items():
                self.tuning[k] = v

    def plot_study(self, algorithm, data_type, study):
        key = f"{data_type}_{algorithm}"

        plots = [
            (
                optuna.visualization.matplotlib.plot_optimization_history,
                "optimization_history",
            ),
            (
                optuna.visualization.matplotlib.plot_parallel_coordinate,
                "parallel_coordinate",
            ),
            (
                optuna.visualization.matplotlib.plot_param_importances,
                "param_importances",
            ),
            # (optuna.visualization.matplotlib.plot_slice, "slice"),
        ]

        matplotlib_default_figsize = matplotlib.rcParams["figure.figsize"]
        matplotlib.rcParams["figure.figsize"] = (11, 7)

        md = f"# Optuna tuning for {algorithm} on {data_type} data\n\n"
        for plot, title in plots:
            try:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    plt.figure()
                    plt.rcParams["axes.grid"] = title != "parallel_coordinate"
                    plot(study)
                    plt.tight_layout(pad=2.0)
                    fname = f"{key}_{title}.png"
                    plt.savefig(os.path.join(self.study_dir, fname))
                    plt.close("all")

                    md += f'## {algorithm} {title.replace("_", " ").title()}\n\n'
                    md += f"![{algorithm} {data_type} {title}]({fname})\n\n"

            except Exception as e:
                print(str(e))

        matplotlib.rcParams["figure.figsize"] = matplotlib_default_figsize
        plt.style.use("default")

        with open(os.path.join(self.study_dir, "README.md"), "a") as fout:
            fout.write(md)
            fout.write("\n\n[<< Go back](../README.md)\n")

```

--------------------------------------------------------------------------------
/supervised/algorithms/lightgbm.py:
--------------------------------------------------------------------------------

```python
import contextlib
import copy
import logging
import os

import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.base import ClassifierMixin, RegressorMixin

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (
    BINARY_CLASSIFICATION,
    MULTICLASS_CLASSIFICATION,
    REGRESSION,
    AlgorithmsRegistry,
)
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import (
    lightgbm_eval_metric_accuracy,
    lightgbm_eval_metric_average_precision,
    lightgbm_eval_metric_f1,
    lightgbm_eval_metric_pearson,
    lightgbm_eval_metric_r2,
    lightgbm_eval_metric_spearman,
    lightgbm_eval_metric_user_defined,
)

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)


def lightgbm_objective(ml_task, automl_eval_metric):
    objective = "regression"
    if ml_task == BINARY_CLASSIFICATION:
        objective = "binary"
    elif ml_task == MULTICLASS_CLASSIFICATION:
        objective = "multiclass"
    else:  # ml_task == REGRESSION
        objective = "regression"
    return objective


def lightgbm_eval_metric(ml_task, automl_eval_metric):
    if automl_eval_metric == "user_defined_metric":
        return "custom", automl_eval_metric
    metric_name_mapping = {
        BINARY_CLASSIFICATION: {
            "auc": "auc",
            "logloss": "binary_logloss",
            "f1": "custom",
            "average_precision": "custom",
            "accuracy": "custom",
        },
        MULTICLASS_CLASSIFICATION: {
            "logloss": "multi_logloss",
            "f1": "custom",
            "accuracy": "custom",
        },
        REGRESSION: {
            "rmse": "rmse",
            "mse": "l2",
            "mae": "l1",
            "mape": "mape",
            "r2": "custom",
            "spearman": "custom",
            "pearson": "custom",
        },
    }

    metric = metric_name_mapping[ml_task][automl_eval_metric]
    custom_eval_metric = None

    if automl_eval_metric in [
        "r2",
        "spearman",
        "pearson",
        "f1",
        "average_precision",
        "accuracy",
    ]:
        custom_eval_metric = automl_eval_metric

    return metric, custom_eval_metric


class LightgbmAlgorithm(BaseAlgorithm):
    algorithm_name = "LightGBM"
    algorithm_short_name = "LightGBM"

    def __init__(self, params):
        super(LightgbmAlgorithm, self).__init__(params)
        self.library_version = lgb.__version__

        self.explain_level = params.get("explain_level", 0)
        self.rounds = additional.get("max_rounds", 10000)
        self.max_iters = 1
        self.early_stopping_rounds = additional.get("early_stopping_rounds", 50)

        n_jobs = self.params.get("n_jobs", 0)
        # 0 is the default for LightGBM to use all cores
        if n_jobs == -1:
            n_jobs = 0

        self.learner_params = {
            "boosting_type": "gbdt",
            "objective": self.params.get("objective", "binary"),
            "metric": self.params.get("metric", "binary_logloss"),
            "num_leaves": self.params.get("num_leaves", 31),
            "learning_rate": self.params.get("learning_rate", 0.1),
            "feature_fraction": self.params.get("feature_fraction", 1.0),
            "bagging_fraction": self.params.get("bagging_fraction", 1.0),
            "min_data_in_leaf": self.params.get("min_data_in_leaf", 20),
            "num_threads": n_jobs,
            "verbose": -1,
            "seed": self.params.get("seed", 1),
            "extra_trees": self.params.get("extra_trees", False),
        }

        for extra_param in [
            "lambda_l1",
            "lambda_l2",
            "bagging_freq",
            "feature_pre_filter",
            "cat_feature",
            "cat_l2",
            "cat_smooth",
            "max_bin",
        ]:
            if extra_param in self.params:
                self.learner_params[extra_param] = self.params[extra_param]

        if "num_boost_round" in self.params:
            self.rounds = self.params["num_boost_round"]
        if "early_stopping_rounds" in self.params:
            self.early_stopping_rounds = self.params["early_stopping_rounds"]

        if "num_class" in self.params:  # multiclass classification
            self.learner_params["num_class"] = self.params.get("num_class")

        self.custom_eval_metric = None
        if self.params.get("custom_eval_metric_name") is not None:
            if self.params["custom_eval_metric_name"] == "r2":
                self.custom_eval_metric = lightgbm_eval_metric_r2
            elif self.params["custom_eval_metric_name"] == "spearman":
                self.custom_eval_metric = lightgbm_eval_metric_spearman
            elif self.params["custom_eval_metric_name"] == "pearson":
                self.custom_eval_metric = lightgbm_eval_metric_pearson
            elif self.params["custom_eval_metric_name"] == "f1":
                self.custom_eval_metric = lightgbm_eval_metric_f1
            elif self.params["custom_eval_metric_name"] == "average_precision":
                self.custom_eval_metric = lightgbm_eval_metric_average_precision
            elif self.params["custom_eval_metric_name"] == "accuracy":
                self.custom_eval_metric = lightgbm_eval_metric_accuracy
            elif self.params["custom_eval_metric_name"] == "user_defined_metric":
                self.custom_eval_metric = lightgbm_eval_metric_user_defined

        logger.debug("LightgbmLearner __init__")

    def file_extension(self):
        return "lightgbm"

    def update(self, update_params):
        pass

    """
    def get_boosting_rounds(self, lgb_train, valid_sets, esr, max_time):
        if max_time is None:
            max_time = 3600.0
        start_time = time.time()
        evals_result = {}
        model = lgb.train(
            self.learner_params,
            lgb_train,
            num_boost_round=2,
            valid_sets=valid_sets,
            early_stopping_rounds=esr,
            evals_result=evals_result,
            verbose_eval=False,
        )
        time_1_iter = (time.time() - start_time) / 2.0

        # 2.0 is just a scaling factor
        # purely heuristic
        iters = int(max_time / time_1_iter * 2.0)
        iters = max(iters, 100)
        iters = min(iters, 10000)
        return iters
    """

    def fit(
        self,
        X,
        y,
        sample_weight=None,
        X_validation=None,
        y_validation=None,
        sample_weight_validation=None,
        log_to_file=None,
        max_time=None,
    ):
        lgb_train = lgb.Dataset(
            X.values if isinstance(X, pd.DataFrame) else X,
            y,
            weight=sample_weight,
        )
        valid_sets = None
        if self.early_stopping_rounds == 0:
            self.model = lgb.train(
                self.learner_params,
                lgb_train,
                num_boost_round=self.rounds,
                init_model=self.model,
            )
        else:
            valid_names = None
            esr = None
            if X_validation is not None and y_validation is not None:
                valid_sets = [
                    lgb_train,
                    lgb.Dataset(
                        X_validation.values
                        if isinstance(X_validation, pd.DataFrame)
                        else X_validation,
                        y_validation,
                        weight=sample_weight_validation,
                    ),
                ]
                valid_names = ["train", "validation"]
                esr = self.early_stopping_rounds
            evals_result = {}

            # disable for now ...
            # boosting_rounds = self.get_boosting_rounds(lgb_train, valid_sets, esr, max_time)

            self.model = lgb.train(
                self.learner_params,
                lgb_train,
                num_boost_round=self.rounds,
                valid_sets=valid_sets,
                valid_names=valid_names,
                feval=self.custom_eval_metric,
                callbacks=[
                    lgb.early_stopping(esr, verbose=False),
                    lgb.record_evaluation(evals_result),
                ],
            )

            del lgb_train
            if valid_sets is not None:
                del valid_sets[0]
                del valid_sets

            if log_to_file is not None:
                metric_name = list(evals_result["train"].keys())[0]
                result = pd.DataFrame(
                    {
                        "iteration": range(len(evals_result["train"][metric_name])),
                        "train": evals_result["train"][metric_name],
                        "validation": evals_result["validation"][metric_name],
                    }
                )
                result.to_csv(log_to_file, index=False, header=False)

            if self.params["ml_task"] != REGRESSION:
                self.classes_ = np.unique(y)

    def is_fitted(self):
        return self.model is not None

    def predict(self, X):
        self.reload()
        return self.model.predict(X.values if isinstance(X, pd.DataFrame) else X)

    def copy(self):
        with open(os.devnull, "w") as f, contextlib.redirect_stdout(f):
            return copy.deepcopy(self)

    def save(self, model_file_path):
        self.model.save_model(model_file_path)
        self.model_file_path = model_file_path
        logger.debug("LightgbmAlgorithm save model to %s" % model_file_path)

    def load(self, model_file_path):
        logger.debug("LightgbmAlgorithm load model from %s" % model_file_path)
        self.model_file_path = model_file_path
        self.model = lgb.Booster(model_file=model_file_path)

    def get_metric_name(self):
        metric = self.params.get("metric")
        custom_metric = self.params.get("custom_eval_metric_name")

        if metric is None:
            return None
        if metric == "custom":
            return custom_metric
        if metric == "binary_logloss":
            return "logloss"
        elif metric == "multi_logloss":
            return "logloss"
        return metric


lgbm_bin_params = {
    "objective": ["binary"],
    "num_leaves": [15, 31, 63, 95, 127],
    "learning_rate": [0.05, 0.1, 0.2],
    "feature_fraction": [0.5, 0.8, 0.9, 1.0],
    "bagging_fraction": [0.5, 0.8, 0.9, 1.0],
    "min_data_in_leaf": [5, 10, 15, 20, 30, 50],
}

classification_bin_default_params = {
    "objective": "binary",
    "num_leaves": 63,
    "learning_rate": 0.05,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.9,
    "min_data_in_leaf": 10,
}


additional = {
    "max_rounds": 10000,
    "early_stopping_rounds": 50,
    "max_rows_limit": None,
    "max_cols_limit": None,
}

required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "target_as_integer",
]

lgbm_multi_params = copy.deepcopy(lgbm_bin_params)
lgbm_multi_params["objective"] = ["multiclass"]

classification_multi_default_params = {
    "objective": "multiclass",
    "num_leaves": 63,
    "learning_rate": 0.05,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.9,
    "min_data_in_leaf": 10,
}

lgbr_params = copy.deepcopy(lgbm_bin_params)
lgbr_params["objective"] = ["regression"]


class LgbmClassifier(ClassifierMixin, LightgbmAlgorithm):
    pass


AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    LgbmClassifier,
    lgbm_bin_params,
    required_preprocessing,
    additional,
    classification_bin_default_params,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    LgbmClassifier,
    lgbm_multi_params,
    required_preprocessing,
    additional,
    classification_multi_default_params,
)

regression_required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "target_scale",
]


regression_default_params = {
    "objective": "regression",
    "num_leaves": 63,
    "learning_rate": 0.05,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.9,
    "min_data_in_leaf": 10,
}


class LgbmRegressor(RegressorMixin, LightgbmAlgorithm):
    pass


AlgorithmsRegistry.add(
    REGRESSION,
    LgbmRegressor,
    lgbr_params,
    regression_required_preprocessing,
    additional,
    regression_default_params,
)

```

--------------------------------------------------------------------------------
/supervised/utils/shap.py:
--------------------------------------------------------------------------------

```python
import logging
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
shap_pacakge_available = False
try:
    # I'm tired of all shap dependency hell
    # ugh
    import shap
    shap_pacakge_available = True
except Exception:
    pass

from sklearn.preprocessing import OneHotEncoder

from supervised.algorithms.registry import (
    BINARY_CLASSIFICATION,
    MULTICLASS_CLASSIFICATION,
    REGRESSION,
)

logger = logging.getLogger(__name__)
from supervised.utils.config import LOG_LEVEL

logger.setLevel(LOG_LEVEL)
import warnings


class PlotSHAP:
    @staticmethod
    def is_available(algorithm, X_train, y_train, ml_task):
        if not shap_pacakge_available:
            return False
        # https://github.com/mljar/mljar-supervised/issues/112 disable for NN
        # https://github.com/mljar/mljar-supervised/issues/114 disable for CatBoost
        if algorithm.algorithm_short_name in ["Baseline", "Neural Network", "CatBoost"]:
            return False
        if (
            algorithm.algorithm_short_name == "Xgboost"
            and algorithm.learner_params["booster"] == "gblinear"
        ):
            # Xgboost gblinear is not supported by SHAP
            return False
        # disable for large number of columns
        if X_train.shape[1] > 500:
            warnings.warn(
                "Disable SHAP explanations because of number of columns > 500."
            )
            return False
        if ml_task == MULTICLASS_CLASSIFICATION and len(np.unique(y_train)) > 100:
            warnings.warn(
                "Disable SHAP explanations because of large number of classes (> 100)."
            )
            return False
        if X_train.shape[0] < 20:
            warnings.warn(
                "Disable SHAP explanations because of small number of samples (< 20)."
            )
            return False
        return True

    @staticmethod
    def get_explainer(algorithm, X_train):
        explainer = None
        if algorithm.algorithm_short_name in [
            "Xgboost",
            "Decision Tree",
            "Random Forest",
            "LightGBM",
            "Extra Trees",
            "CatBoost",
        ]:
            explainer = shap.TreeExplainer(algorithm.model)
        elif algorithm.algorithm_short_name in ["Linear"]:
            explainer = shap.LinearExplainer(algorithm.model, X_train)
        # elif algorithm.algorithm_short_name in ["Neural Network"]:
        #    explainer = shap.KernelExplainer(algorithm.model.predict, X_train)  # slow

        return explainer

    @staticmethod
    def get_sample(X_validation, y_validation):
        # too many samples in the data, down-sample it
        SAMPLES_LIMIT = 1000
        if X_validation.shape[0] > SAMPLES_LIMIT:
            X_validation.reset_index(inplace=True, drop=True)
            y_validation.reset_index(inplace=True, drop=True)
            X_vald = X_validation.sample(SAMPLES_LIMIT)
            y_vald = y_validation[X_vald.index]
        else:
            X_vald = X_validation
            y_vald = y_validation
        return X_vald, y_vald

    def get_predictions(algorithm, X_vald, y_vald, ml_task):
        # compute predictions on down-sampled data
        predictions = algorithm.predict(X_vald)

        if ml_task == MULTICLASS_CLASSIFICATION:
            oh = OneHotEncoder(sparse_output=False)
            y_encoded = oh.fit_transform(np.array(y_vald).reshape(-1, 1))
            residua = np.sum(np.abs(np.array(y_encoded) - predictions), axis=1)
        else:
            residua = np.abs(np.array(y_vald) - predictions)

        df_preds = pd.DataFrame(
            {"res": residua, "lp": range(residua.shape[0]), "target": np.array(y_vald)},
            index=X_vald.index,
        )
        df_preds = df_preds.sort_values(by="res", ascending=False)

        return df_preds

    @staticmethod
    def summary(shap_values, X_vald, model_file_path, learner_name, class_names):
        fig = plt.gcf()
        classes = None
        if class_names is not None and len(class_names):
            classes = class_names
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            shap.summary_plot(
                shap_values, X_vald, plot_type="bar", show=False, class_names=classes
            )
            fig.tight_layout(pad=2.0)
            fig.savefig(os.path.join(model_file_path, f"{learner_name}_shap_summary.png"))
            plt.close("all")

        vals = None
        if isinstance(shap_values, list):
            for sh in shap_values:
                v = np.abs(sh).mean(0)
                vals = v if vals is None else vals + v
        else:
            vals = np.abs(shap_values).mean(0)
        feature_importance = pd.DataFrame(
            list(zip(X_vald.columns, vals)), columns=["feature", "shap_importance"]
        )
        feature_importance.sort_values(
            by=["shap_importance"], ascending=False, inplace=True
        )
        feature_importance.to_csv(
            os.path.join(model_file_path, f"{learner_name}_shap_importance.csv"),
            index=False,
        )

    @staticmethod
    def dependence(shap_values, X_vald, model_file_path, learner_name, file_postfix=""):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            fig = plt.figure(figsize=(14, 7))
            plots_cnt = np.min([9, X_vald.shape[1]])
            cols_cnt = 3
            rows_cnt = 3
            if plots_cnt < 4:
                rows_cnt = 1
            elif plots_cnt < 7:
                rows_cnt = 2
            for i in range(plots_cnt):
                ax = fig.add_subplot(rows_cnt, cols_cnt, i + 1)
                shap.dependence_plot(
                    f"rank({i})",
                    shap_values,
                    X_vald,
                    show=False,
                    title=f"Importance #{i+1}",
                    ax=ax,
                )

            fig.tight_layout(pad=2.0)
            fig.savefig(
                os.path.join(
                    model_file_path, f"{learner_name}_shap_dependence{file_postfix}.png"
                )
            )
            plt.close("all")

    @staticmethod
    def compute(
        algorithm,
        X_train,
        y_train,
        X_validation,
        y_validation,
        model_file_path,
        learner_name,
        class_names,
        ml_task,
    ):
        if not PlotSHAP.is_available(algorithm, X_train, y_train, ml_task):
            return
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                explainer = PlotSHAP.get_explainer(algorithm, X_train)
                X_vald, y_vald = PlotSHAP.get_sample(X_validation, y_validation)
                shap_values = explainer.shap_values(X_vald)

            # fix problem with 1 or 2 dimensions for binary classification
            expected_value = explainer.expected_value
            if ml_task == BINARY_CLASSIFICATION and isinstance(shap_values, list):
                shap_values = shap_values[1]
                expected_value = explainer.expected_value[1]

            # Summary SHAP plot
            PlotSHAP.summary(
                shap_values, X_vald, model_file_path, learner_name, class_names
            )
            # Dependence SHAP plots
            if ml_task == MULTICLASS_CLASSIFICATION:
                for t in np.unique(y_vald):
                    PlotSHAP.dependence(
                        shap_values[t],
                        X_vald,
                        model_file_path,
                        learner_name,
                        f"_class_{class_names[t]}",
                    )
            else:
                PlotSHAP.dependence(shap_values, X_vald, model_file_path, learner_name)

            # Decision SHAP plots
            df_preds = PlotSHAP.get_predictions(algorithm, X_vald, y_vald, ml_task)

            if ml_task == REGRESSION:
                PlotSHAP.decisions_regression(
                    df_preds,
                    shap_values,
                    expected_value,
                    X_vald,
                    y_vald,
                    model_file_path,
                    learner_name,
                )
            elif ml_task == BINARY_CLASSIFICATION:
                PlotSHAP.decisions_binary(
                    df_preds,
                    shap_values,
                    expected_value,
                    X_vald,
                    y_vald,
                    model_file_path,
                    learner_name,
                )
            else:
                PlotSHAP.decisions_multiclass(
                    df_preds,
                    shap_values,
                    expected_value,
                    X_vald,
                    y_vald,
                    model_file_path,
                    learner_name,
                    class_names,
                )
        except Exception as e:
            pass
            # print(
            #    f"Exception while producing SHAP explanations. {str(e)}\nContinuing ..."
            # )

    @staticmethod
    def decisions_regression(
        df_preds,
        shap_values,
        expected_value,
        X_vald,
        y_vald,
        model_file_path,
        learner_name,
    ):
        fig = plt.gcf()
        shap.decision_plot(
            expected_value,
            shap_values[df_preds.lp[:10], :],
            X_vald.loc[df_preds.index[:10]],
            show=False,
        )
        fig.tight_layout(pad=2.0)
        fig.savefig(
            os.path.join(model_file_path, f"{learner_name}_shap_worst_decisions.png")
        )
        plt.close("all")

        fig = plt.gcf()
        shap.decision_plot(
            expected_value,
            shap_values[df_preds.lp[-10:], :],
            X_vald.loc[df_preds.index[-10:]],
            show=False,
        )
        fig.tight_layout(pad=2.0)
        fig.savefig(
            os.path.join(model_file_path, f"{learner_name}_shap_best_decisions.png")
        )
        plt.close("all")

    @staticmethod
    def decisions_binary(
        df_preds,
        shap_values,
        expected_value,
        X_vald,
        y_vald,
        model_file_path,
        learner_name,
    ):
        # classes are from 0 ...
        for t in np.unique(y_vald):
            fig = plt.gcf()
            shap.decision_plot(
                expected_value,
                shap_values[df_preds[df_preds.target == t].lp[:10], :],
                X_vald.loc[df_preds[df_preds.target == t].index[:10]],
                show=False,
            )
            fig.tight_layout(pad=2.0)
            fig.savefig(
                os.path.join(
                    model_file_path,
                    f"{learner_name}_shap_class_{t}_worst_decisions.png",
                )
            )
            plt.close("all")

            fig = plt.gcf()
            shap.decision_plot(
                expected_value,
                shap_values[df_preds[df_preds.target == t].lp[-10:], :],
                X_vald.loc[df_preds[df_preds.target == t].index[-10:]],
                show=False,
            )
            fig.tight_layout(pad=2.0)
            fig.savefig(
                os.path.join(
                    model_file_path, f"{learner_name}_shap_class_{t}_best_decisions.png"
                )
            )
            plt.close("all")

    @staticmethod
    def decisions_multiclass(
        df_preds,
        shap_values,
        expected_value,
        X_vald,
        y_vald,
        model_file_path,
        learner_name,
        class_names,
    ):
        for decision_type in ["worst", "best"]:
            m = 1 if decision_type == "worst" else -1
            for i in range(4):
                fig = plt.gcf()
                shap.multioutput_decision_plot(
                    list(expected_value),
                    shap_values,
                    row_index=df_preds.lp.iloc[m * i],
                    show=False,
                    legend_labels=class_names,
                    title=f"It should be {class_names[df_preds.target.iloc[m*i]]}",
                )
                fig.tight_layout(pad=2.0)
                fig.savefig(
                    os.path.join(
                        model_file_path,
                        f"{learner_name}_sample_{i}_{decision_type}_decisions.png",
                    )
                )
                plt.close("all")

```

--------------------------------------------------------------------------------
/supervised/algorithms/xgboost.py:
--------------------------------------------------------------------------------

```python
import copy
import logging

import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.base import ClassifierMixin, RegressorMixin

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (
    BINARY_CLASSIFICATION,
    MULTICLASS_CLASSIFICATION,
    REGRESSION,
    AlgorithmsRegistry,
)
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import (
    xgboost_eval_metric_accuracy,
    xgboost_eval_metric_average_precision,
    xgboost_eval_metric_f1,
    xgboost_eval_metric_mse,
    xgboost_eval_metric_pearson,
    xgboost_eval_metric_r2,
    xgboost_eval_metric_spearman,
    xgboost_eval_metric_user_defined,
)

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)


class XgbAlgorithmException(Exception):
    def __init__(self, message):
        super(XgbAlgorithmException, self).__init__(message)
        logger.error(message)


def time_constraint(env):
    # print("time constraint")
    pass


def xgboost_eval_metric(ml_task, automl_eval_metric):
    # the mapping is almost the same
    eval_metric_name = automl_eval_metric
    if ml_task == MULTICLASS_CLASSIFICATION:
        if automl_eval_metric == "logloss":
            eval_metric_name = "mlogloss"
    return eval_metric_name


def xgboost_objective(ml_task, automl_eval_metric):
    objective = "reg:squarederror"
    if ml_task == BINARY_CLASSIFICATION:
        objective = "binary:logistic"
    elif ml_task == MULTICLASS_CLASSIFICATION:
        objective = "multi:softprob"
    else:  # ml_task == REGRESSION
        objective = "reg:squarederror"
    return objective


class XgbAlgorithm(BaseAlgorithm):
    """
    This is a wrapper over xgboost algorithm.
    """

    algorithm_name = "Extreme Gradient Boosting"
    algorithm_short_name = "Xgboost"

    def __init__(self, params):
        super(XgbAlgorithm, self).__init__(params)
        self.library_version = xgb.__version__

        self.explain_level = params.get("explain_level", 0)
        self.boosting_rounds = additional.get("max_rounds", 10000)
        self.max_iters = 1
        self.early_stopping_rounds = additional.get("early_stopping_rounds", 50)
        self.learner_params = {
            "tree_method": "hist",
            "booster": "gbtree",
            "objective": self.params.get("objective"),
            "eval_metric": self.params.get("eval_metric"),
            "eta": self.params.get("eta", 0.01),
            "max_depth": self.params.get("max_depth", 1),
            "min_child_weight": self.params.get("min_child_weight", 1),
            "subsample": self.params.get("subsample", 0.8),
            "colsample_bytree": self.params.get("colsample_bytree", 0.8),
            "n_jobs": self.params.get("n_jobs", -1),
            # "silent": self.params.get("silent", 1),
            "seed": self.params.get("seed", 1),
            "verbosity": 0,
        }

        if "lambda" in self.params:
            self.learner_params["lambda"] = self.params["lambda"]
        if "alpha" in self.params:
            self.learner_params["alpha"] = self.params["alpha"]

        # check https://github.com/dmlc/xgboost/issues/5637
        if self.learner_params["seed"] > 2147483647:
            self.learner_params["seed"] = self.learner_params["seed"] % 2147483647
        if "num_class" in self.params:  # multiclass classification
            self.learner_params["num_class"] = self.params.get("num_class")

        if "max_rounds" in self.params:
            self.boosting_rounds = self.params["max_rounds"]

        self.custom_eval_metric = None
        if self.params.get("eval_metric", "") == "r2":
            self.custom_eval_metric = xgboost_eval_metric_r2
        elif self.params.get("eval_metric", "") == "spearman":
            self.custom_eval_metric = xgboost_eval_metric_spearman
        elif self.params.get("eval_metric", "") == "pearson":
            self.custom_eval_metric = xgboost_eval_metric_pearson
        elif self.params.get("eval_metric", "") == "f1":
            self.custom_eval_metric = xgboost_eval_metric_f1
        elif self.params.get("eval_metric", "") == "average_precision":
            self.custom_eval_metric = xgboost_eval_metric_average_precision
        elif self.params.get("eval_metric", "") == "accuracy":
            self.custom_eval_metric = xgboost_eval_metric_accuracy
        elif self.params.get("eval_metric", "") == "mse":
            self.custom_eval_metric = xgboost_eval_metric_mse
        elif self.params.get("eval_metric", "") == "user_defined_metric":
            self.custom_eval_metric = xgboost_eval_metric_user_defined

        logger.debug("XgbLearner __init__")

    """
    def get_boosting_rounds(self, dtrain, evals, esr, max_time):
        if max_time is None:
            return self.boosting_rounds

        start_time = time.time()
        evals_result = {}
        model = xgb.train(
            self.learner_params,
            dtrain,
            2,
            evals=evals,
            early_stopping_rounds=esr,
            evals_result=evals_result,
            verbose_eval=False,
        )
        time_1_iter = (time.time() - start_time) / 2.0

        # 2.0 is just a scaling factor
        # purely heuristic
        iters = int(max_time / time_1_iter * 2.0)
        iters = max(iters, 100)
        iters = min(iters, 10000)
        return iters
    """

    def fit(
        self,
        X,
        y,
        sample_weight=None,
        X_validation=None,
        y_validation=None,
        sample_weight_validation=None,
        log_to_file=None,
        max_time=None,
    ):
        dtrain = xgb.DMatrix(
            X.values if isinstance(X, pd.DataFrame) else X,
            label=y,
            missing=np.NaN,
            weight=sample_weight,
        )
        
        if X_validation is not None and y_validation is not None:       
            dvalidation = xgb.DMatrix(
                X_validation.values
                if isinstance(X_validation, pd.DataFrame)
                else X_validation,
                label=y_validation,
                missing=np.NaN,
                weight=sample_weight_validation,
            )
        else:
            dvalidation = None
            
        evals_result = {}

        evals = []
        esr = None
        if X_validation is not None and y_validation is not None:
            evals = [(dtrain, "train"), (dvalidation, "validation")]
            esr = self.early_stopping_rounds

        # disable for now, dont have better idea how to handle time limit ...
        # looks like there is better not to limit the algorithm
        # just wait till they converge ...
        # boosting_rounds = self.get_boosting_rounds(dtrain, evals, esr, max_time)

        if self.custom_eval_metric is not None:
            del self.learner_params["eval_metric"]

        self.model = xgb.train(
            self.learner_params,
            dtrain,
            self.boosting_rounds,
            evals=evals,
            early_stopping_rounds=esr,
            evals_result=evals_result,
            verbose_eval=False,
            custom_metric=self.custom_eval_metric
            # callbacks=[time_constraint] # callback slows down by factor ~8
        )

        del dtrain
        del dvalidation

        if log_to_file is not None:
            metric_name = list(evals_result["train"].keys())[-1]

            result = pd.DataFrame(
                {
                    "iteration": range(len(evals_result["train"][metric_name])),
                    "train": evals_result["train"][metric_name],
                    "validation": evals_result["validation"][metric_name],
                }
            )
            # it a is custom metric
            # that is always minimized
            # we need to revert it
            if metric_name in [
                "r2",
                "spearman",
                "pearson",
                "f1",
                "average_precision",
                "accuracy",
            ]:
                result["train"] *= -1.0
                result["validation"] *= -1.0

            result.to_csv(log_to_file, index=False, header=False)

        if self.params["ml_task"] != REGRESSION:
            self.classes_ = np.unique(y)

        # fix high memory consumption in xgboost,
        # waiting for release with fix
        # https://github.com/dmlc/xgboost/issues/5474
        """
        # disable, for now all learners are saved to hard disk and then deleted from RAM
        with tempfile.NamedTemporaryFile() as tmp:
            self.model.save_model(tmp.name)
            del self.model
            self.model = xgb.Booster()
            self.model.load_model(tmp.name)
        """

    def is_fitted(self):
        return self.model is not None

    def predict(self, X):
        self.reload()

        if self.model is None:
            raise XgbAlgorithmException("Xgboost model is None")

        dtrain = xgb.DMatrix(
            X.values if isinstance(X, pd.DataFrame) else X, missing=np.NaN
        )
        # xgboost > 2.0.0 version
        if hasattr(self.model, "best_iteration"):
            a = self.model.predict(
                dtrain, iteration_range=(0, self.model.best_iteration + 1)
            )
        else:
            a = self.model.predict(dtrain)

        return a

    def copy(self):
        return copy.deepcopy(self)

    def save(self, model_file_path):
        self.model.save_model(model_file_path)
        self.model_file_path = model_file_path
        logger.debug("XgbAlgorithm save model to %s" % model_file_path)

    def load(self, model_file_path):
        logger.debug("XgbLearner load model from %s" % model_file_path)
        self.model = xgb.Booster()  # init model
        self.model.load_model(model_file_path)
        self.model_file_path = model_file_path

    def file_extension(self):
        # we need to keep models as json files
        # to keep information about best_iteration
        return "xgboost.json"

    def get_metric_name(self):
        metric = self.params.get("eval_metric")
        if metric is None:
            return None
        if metric == "mlogloss":
            return "logloss"
        return metric


# For binary classification target should be 0, 1. There should be no NaNs in target.
xgb_bin_class_params = {
    "objective": ["binary:logistic"],
    "eta": [0.05, 0.075, 0.1, 0.15],
    "max_depth": [4, 5, 6, 7, 8, 9],
    "min_child_weight": [1, 5, 10, 25, 50],
    "subsample": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    "colsample_bytree": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
}

classification_bin_default_params = {
    "objective": "binary:logistic",
    "eta": 0.075,
    "max_depth": 6,
    "min_child_weight": 1,
    "subsample": 1.0,
    "colsample_bytree": 1.0,
}

xgb_regression_params = dict(xgb_bin_class_params)
xgb_regression_params["objective"] = ["reg:squarederror"]
# xgb_regression_params["eval_metric"] = ["rmse", "mae", "mape"]
xgb_regression_params["max_depth"] = [4, 5, 6, 7, 8, 9]


xgb_multi_class_params = dict(xgb_bin_class_params)
xgb_multi_class_params["objective"] = ["multi:softprob"]
# xgb_multi_class_params["eval_metric"] = ["mlogloss"]

classification_multi_default_params = {
    "objective": "multi:softprob",
    "eta": 0.075,
    "max_depth": 6,
    "min_child_weight": 1,
    "subsample": 1.0,
    "colsample_bytree": 1.0,
}


regression_default_params = {
    "objective": "reg:squarederror",
    "eta": 0.075,
    "max_depth": 6,
    "min_child_weight": 1,
    "subsample": 1.0,
    "colsample_bytree": 1.0,
}

additional = {
    "max_rounds": 10000,
    "early_stopping_rounds": 50,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "target_as_integer",
]


class XgbClassifier(ClassifierMixin, XgbAlgorithm):
    pass


AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    XgbClassifier,
    xgb_bin_class_params,
    required_preprocessing,
    additional,
    classification_bin_default_params,
)

AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    XgbClassifier,
    xgb_multi_class_params,
    required_preprocessing,
    additional,
    classification_multi_default_params,
)

regression_required_preprocessing = [
    "missing_values_inputation",
    "convert_categorical",
    "datetime_transform",
    "text_transform",
    "target_scale",
]


class XgbRegressor(RegressorMixin, XgbAlgorithm):
    pass


AlgorithmsRegistry.add(
    REGRESSION,
    XgbRegressor,
    xgb_regression_params,
    regression_required_preprocessing,
    additional,
    regression_default_params,
)

```

--------------------------------------------------------------------------------
/tests/tests_automl/test_automl.py:
--------------------------------------------------------------------------------

```python
import os
import shutil
import unittest
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline

from supervised import AutoML
from supervised.exceptions import AutoMLException

iris = datasets.load_iris()
housing = datasets.fetch_california_housing()
# limit data size for faster tests
housing.data = housing.data[:500]
housing.target = housing.target[:500]
breast_cancer = datasets.load_breast_cancer()


@pytest.mark.usefixtures("data_folder")
class AutoMLTest(unittest.TestCase):
    automl_dir = "AutoMLTest"
    data_folder: Path

    def tearDown(self):
        shutil.rmtree(self.automl_dir, ignore_errors=True)

    def setUp(self):
        shutil.rmtree(self.automl_dir, ignore_errors=True)

    def test_new_directory(self):
        """Directory does not exist, create it"""
        # Assert directory does not exist
        self.assertTrue(not os.path.exists(self.automl_dir))
        # Create model with dir
        model = AutoML(results_path=self.automl_dir)
        # Generate data
        X, y = datasets.make_classification(n_samples=30)
        # Fit data
        model.fit(X, y)  # AutoML only validates constructor params on `fit()` call
        # Assert directory was created
        self.assertTrue(os.path.exists(self.automl_dir))

    def test_empty_directory(self):
        """Directory exists and is empty, use it"""
        # Assert directory does not exist
        self.assertTrue(not os.path.exists(self.automl_dir))
        # Make dir
        os.mkdir(self.automl_dir)
        # Assert dir exists
        self.assertTrue(os.path.exists(self.automl_dir))
        # Create automl with dir
        model = AutoML(results_path=self.automl_dir)
        # Generate data
        X, y = datasets.make_classification(n_samples=30)
        # Fit data
        model.fit(X, y)  # AutoML only validates constructor params on `fit()` call
        self.assertTrue(os.path.exists(self.automl_dir))

    def test_not_empty_directory(self):
        """
        Directory exists and is not empty,
        there is no params.json file in it, dont use it, raise exception
        """
        # Assert directory does not exist
        self.assertTrue(not os.path.exists(self.automl_dir))
        # Create directory
        os.mkdir(self.automl_dir)
        # Write some content to directory
        open(os.path.join(self.automl_dir, "test.file"), "w").close()
        # Assert directory exists
        self.assertTrue(os.path.exists(self.automl_dir))
        # Generate data
        X, y = datasets.make_classification(n_samples=30)
        # Assert than an Exception is raised
        with self.assertRaises(AutoMLException) as context:
            a = AutoML(results_path=self.automl_dir)
            a.fit(X, y)  # AutoML only validates constructor params on `fit()` call

        self.assertTrue("not empty" in str(context.exception))

    def test_use_directory_if_non_empty_exists_with_params_json(self):
        """
        Directory exists and is not empty,
        there is params.json in it, try to load it,
        raise exception because of fake params.json
        """
        # Assert directory does not exist
        self.assertTrue(not os.path.exists(self.automl_dir))
        # Create dir
        os.mkdir(self.automl_dir)
        # Write `params.json` to directory
        open(os.path.join(self.automl_dir, "params.json"), "w").close()
        # Assert directory exists
        self.assertTrue(os.path.exists(self.automl_dir))
        # Generate data
        X, y = datasets.make_classification(n_samples=30)
        with self.assertRaises(AutoMLException) as context:
            a = AutoML(results_path=self.automl_dir)
            a.predict(X)  # AutoML tries to load on predict call
        self.assertTrue("Cannot load" in str(context.exception))

    def test_get_params(self):
        """
        Passes params in AutoML constructor and uses `get_params()` after fitting.
        Initial params must be equal to the ones returned by `get_params()`.
        """
        # Create model
        model = AutoML(
            hill_climbing_steps=3, start_random_models=1, results_path=self.automl_dir
        )
        # Get params before fit
        params_before_fit = model.get_params()
        # Generate data
        X, y = datasets.make_classification(n_samples=30)
        # Fit data
        model.fit(X, y)
        # Get params after fit
        params_after_fit = model.get_params()
        # Assert before and after params are equal
        self.assertEqual(params_before_fit, params_after_fit)

    def test_scikit_learn_pipeline_integration(self):
        """
        Tests if AutoML is working on a scikit-learn's pipeline
        """
        # Create dataset
        X, y = datasets.make_classification(n_samples=30)
        # apply PCA to X
        new_X = PCA(random_state=0).fit_transform(X)
        # Create default model
        default_model = AutoML(
            algorithms=["Linear"], random_state=0, results_path=self.automl_dir
        )
        # Fit default model with transformed X and y, and predict transformed X
        y_pred_default = default_model.fit(new_X, y).predict(new_X)

        # Create pipeline with PCA and AutoML
        pipeline = make_pipeline(
            PCA(random_state=0), AutoML(algorithms=["Linear"], random_state=0)
        )
        # Fit with original X and y and predict X
        y_pred_pipe = pipeline.fit(X, y).predict(X)
        # y_pred_default must be equal to y_pred_pipe
        self.assertTrue((y_pred_pipe == y_pred_default).all())

    def test_predict_proba_in_regression(self):
        model = AutoML(
            explain_level=0, verbose=0, random_state=1, results_path=self.automl_dir
        )
        model.fit(housing.data, housing.target)
        with self.assertRaises(AutoMLException) as context:
            # Try to call predict_proba in regression task
            model.predict_proba(housing.data)

    def test_iris_dataset(self):
        """Tests AutoML in the iris dataset (Multiclass classification)"""
        model = AutoML(
            explain_level=0, verbose=0, random_state=1, results_path=self.automl_dir
        )
        score = model.fit(iris.data, iris.target).score(iris.data, iris.target)
        self.assertGreater(score, 0.5)

    def test_housing_dataset(self):
        """Tests AutoML in the housing dataset (Regression)"""
        model = AutoML(
            explain_level=0, verbose=0, random_state=1, results_path=self.automl_dir
        )
        score = model.fit(housing.data, housing.target).score(
            housing.data, housing.target
        )
        self.assertGreater(score, 0.5)

    def test_breast_cancer_dataset(self):
        """Tests AutoML in the breast cancer (binary classification)"""
        model = AutoML(
            explain_level=0, verbose=0, random_state=1, results_path=self.automl_dir
        )
        score = model.fit(breast_cancer.data, breast_cancer.target).score(
            breast_cancer.data, breast_cancer.target
        )
        self.assertGreater(score, 0.5)

    def test_titatic_dataset(self):
        """Tets AutoML in the titanic dataset (binary classification) with categorial features"""
        data_folder = self.data_folder
        automl = AutoML(
            algorithms=["Xgboost"], mode="Explain", results_path=self.automl_dir
        )

        df = pd.read_csv((data_folder / "Titanic/train.csv"))

        X = df[df.columns[2:]]
        y = df["Survived"]

        automl.fit(X, y)

        test = pd.read_csv(data_folder / "Titanic/test_with_Survived.csv")
        test_cols = [
            "Parch",
            "Ticket",
            "Fare",
            "Pclass",
            "Name",
            "Sex",
            "Age",
            "SibSp",
            "Cabin",
            "Embarked",
        ]
        score = automl.score(test[test_cols], test["Survived"])
        self.assertGreater(score, 0.5)

    def test_score_without_y(self):
        """Tests the use of `score()` without passing y. Should raise AutoMLException"""
        model = AutoML(
            explain_level=0, verbose=0, random_state=1, results_path=self.automl_dir
        )
        # Assert than an Exception is raised
        with self.assertRaises(AutoMLException) as context:
            # Try to score without passing 'y'
            score = model.fit(breast_cancer.data, breast_cancer.target).score(
                breast_cancer.data
            )

        self.assertTrue("y must be specified" in str(context.exception))

    def test_no_constructor_args(self):
        """Tests the use of AutoML without passing any args. Should work without any arguments"""
        # Create model with no arguments
        model = AutoML()
        model.results_path = self.automl_dir
        # Assert than an Exception is raised
        score = model.fit(iris.data, iris.target).score(iris.data, iris.target)
        self.assertGreater(score, 0.5)

    def test_fit_returns_self(self):
        """Tests if the `fit()` method returns `self`. This allows to quickly implement one-liners with AutoML"""
        model = AutoML()
        model.results_path = self.automl_dir
        self.assertTrue(
            isinstance(model.fit(iris.data, iris.target), AutoML),
            "`fit()` method must return 'self'",
        )

    def test_invalid_mode(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"mode": "invalid_mode"}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_ml_task(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"ml_task": "invalid_task"}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_results_path(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"results_path": 2}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_total_time_limit(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"total_time_limit": -1}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_model_time_limit(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"model_time_limit": -1}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_algorithm_name(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"algorithms": ["Baseline", "Neural Netrk"]}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_train_ensemble(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"train_ensemble": "not bool"}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_stack_models(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"stack_models": "not bool"}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_eval_metric(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"eval_metric": "not_real_metric"}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_validation_strategy(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"validation_strategy": "test"}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_invalid_verbose(self):
        model = AutoML(explain_level=0, verbose=0, results_path=self.automl_dir)
        param = {"verbose": -1}
        model.set_params(**param)
        with self.assertRaises(ValueError) as context:
            model.fit(iris.data, iris.target)

    def test_too_small_time_limit(self):
        rows = 1000000
        X = np.random.uniform(size=(rows, 100))
        y = np.random.randint(0, 2, size=(rows,))

        automl = AutoML(
            results_path=self.automl_dir, total_time_limit=1, train_ensemble=False
        )
        with self.assertRaises(AutoMLException) as context:
            automl.fit(X, y)

```

--------------------------------------------------------------------------------
/supervised/utils/metric.py:
--------------------------------------------------------------------------------

```python
import logging

log = logging.getLogger(__name__)

import numpy as np
import pandas as pd
import scipy as sp
from sklearn.metrics import (
    accuracy_score,
    average_precision_score,
    f1_score,
    log_loss,
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    mean_squared_log_error,
    r2_score,
    roc_auc_score,
)


def logloss(y_true, y_predicted, sample_weight=None):
    # convert predicted values to float32 to avoid warnings
    ll = log_loss(y_true, y_predicted.astype(np.float32), sample_weight=sample_weight)
    return ll


def rmse(y_true, y_predicted, sample_weight=None):
    val = mean_squared_error(y_true, y_predicted, sample_weight=sample_weight)
    return np.sqrt(val) if val > 0 else -np.Inf


def rmsle(y_true, y_predicted, sample_weight=None):
    val = mean_squared_log_error(y_true, y_predicted, sample_weight=sample_weight)
    return np.sqrt(val) if val > 0 else -np.Inf


def negative_auc(y_true, y_predicted, sample_weight=None):
    val = roc_auc_score(y_true, y_predicted, sample_weight=sample_weight)
    return -1.0 * val


def negative_r2(y_true, y_predicted, sample_weight=None):
    val = r2_score(y_true, y_predicted, sample_weight=sample_weight)
    return -1.0 * val


def negative_f1(y_true, y_predicted, sample_weight=None):
    if isinstance(y_true, pd.DataFrame):
        y_true = np.array(y_true)
    if isinstance(y_predicted, pd.DataFrame):
        y_predicted = np.array(y_predicted)

    if len(y_predicted.shape) == 2 and y_predicted.shape[1] == 1:
        y_predicted = y_predicted.ravel()

    average = None
    if len(y_predicted.shape) == 1:
        y_predicted = (y_predicted > 0.5).astype(int)
        average = "binary"
    else:
        y_predicted = np.argmax(y_predicted, axis=1)
        average = "micro"

    val = f1_score(y_true, y_predicted, sample_weight=sample_weight, average=average)

    return -val


def negative_accuracy(y_true, y_predicted, sample_weight=None):
    if isinstance(y_true, pd.DataFrame):
        y_true = np.array(y_true)
    if isinstance(y_predicted, pd.DataFrame):
        y_predicted = np.array(y_predicted)

    if len(y_predicted.shape) == 2 and y_predicted.shape[1] == 1:
        y_predicted = y_predicted.ravel()

    if len(y_predicted.shape) == 1:
        y_predicted = (y_predicted > 0.5).astype(int)
    else:
        y_predicted = np.argmax(y_predicted, axis=1)

    val = accuracy_score(y_true, y_predicted, sample_weight=sample_weight)

    return -val


def negative_average_precision(y_true, y_predicted, sample_weight=None):
    if isinstance(y_true, pd.DataFrame):
        y_true = np.array(y_true)
    if isinstance(y_predicted, pd.DataFrame):
        y_predicted = np.array(y_predicted)

    val = average_precision_score(y_true, y_predicted, sample_weight=sample_weight)

    return -val


def negative_spearman(y_true, y_predicted, sample_weight=None):
    # sample weight is ignored
    c, _ = sp.stats.spearmanr(y_true, y_predicted)
    return -c


def spearman(y_true, y_predicted, sample_weight=None):
    # sample weight is ignored
    c, _ = sp.stats.spearmanr(y_true, y_predicted)
    return c


def negative_pearson(y_true, y_predicted, sample_weight=None):
    # sample weight is ignored
    if isinstance(y_true, pd.DataFrame):
        y_true = np.array(y_true).ravel()
    if isinstance(y_predicted, pd.DataFrame):
        y_predicted = np.array(y_predicted).ravel()
    return -np.corrcoef(y_true, y_predicted)[0, 1]


def pearson(y_true, y_predicted, sample_weight=None):
    return -negative_pearson(y_true, y_predicted, sample_weight)


class MetricException(Exception):
    def __init__(self, message):
        Exception.__init__(self, message)
        log.error(message)


def xgboost_eval_metric_r2(preds, dtrain):
    # Xgboost needs to minimize eval_metric
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    if len(weight) == 0:
        weight = None
    return "r2", -r2_score(target, preds, sample_weight=weight)


def xgboost_eval_metric_spearman(preds, dtrain):
    # Xgboost needs to minimize eval_metric
    target = dtrain.get_label()
    return "spearman", negative_spearman(target, preds)


def xgboost_eval_metric_pearson(preds, dtrain):
    # Xgboost needs to minimize eval_metric
    target = dtrain.get_label()
    return "pearson", negative_pearson(target, preds)


def xgboost_eval_metric_f1(preds, dtrain):
    # Xgboost needs to minimize eval_metric
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    if len(weight) == 0:
        weight = None
    return "f1", negative_f1(target, preds, weight)


def xgboost_eval_metric_average_precision(preds, dtrain):
    # Xgboost needs to minimize eval_metric
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    if len(weight) == 0:
        weight = None
    return "average_precision", negative_average_precision(target, preds, weight)


def xgboost_eval_metric_accuracy(preds, dtrain):
    # Xgboost needs to minimize eval_metric
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    if len(weight) == 0:
        weight = None
    return "accuracy", negative_accuracy(target, preds, weight)


def xgboost_eval_metric_mse(preds, dtrain):
    # Xgboost needs to minimize eval_metric
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    if len(weight) == 0:
        weight = None
    return "mse", mean_squared_error(target, preds, sample_weight=weight)


def lightgbm_eval_metric_r2(preds, dtrain):
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    return "r2", r2_score(target, preds, sample_weight=weight), True


def lightgbm_eval_metric_spearman(preds, dtrain):
    target = dtrain.get_label()
    return "spearman", -negative_spearman(target, preds), True


def lightgbm_eval_metric_pearson(preds, dtrain):
    target = dtrain.get_label()
    return "pearson", -negative_pearson(target, preds), True


def lightgbm_eval_metric_f1(preds, dtrain):
    target = dtrain.get_label()
    weight = dtrain.get_weight()

    unique_targets = np.unique(target)
    if len(unique_targets) > 2:
        cols = len(unique_targets)
        rows = int(preds.shape[0] / len(unique_targets))
        preds = np.reshape(preds, (rows, cols), order="F")

    return "f1", -negative_f1(target, preds, weight), True


def lightgbm_eval_metric_average_precision(preds, dtrain):
    target = dtrain.get_label()
    weight = dtrain.get_weight()

    return "average_precision", -negative_average_precision(target, preds, weight), True


def lightgbm_eval_metric_accuracy(preds, dtrain):
    target = dtrain.get_label()
    weight = dtrain.get_weight()

    return "accuracy", -negative_accuracy(target, preds, weight), True


class CatBoostEvalMetricSpearman(object):
    def get_final_error(self, error, weight):
        return error

    def is_max_optimal(self):
        return True

    def evaluate(self, approxes, target, weight):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        preds = np.array(approxes[0])
        target = np.array(target)

        return -negative_spearman(target, preds), 0


class CatBoostEvalMetricPearson(object):
    def get_final_error(self, error, weight):
        return error

    def is_max_optimal(self):
        return True

    def evaluate(self, approxes, target, weight):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        preds = np.array(approxes[0])
        target = np.array(target)

        return -negative_pearson(target, preds), 0


class CatBoostEvalMetricAveragePrecision(object):
    def get_final_error(self, error, weight):
        return error

    def is_max_optimal(self):
        return True

    def evaluate(self, approxes, target, weight):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        preds = np.array(approxes[0])
        target = np.array(target)
        if weight is not None:
            weight = np.array(weight)

        return -negative_average_precision(target, preds, weight), 0


class CatBoostEvalMetricMSE(object):
    def get_final_error(self, error, weight):
        return error

    def is_max_optimal(self):
        return False

    def evaluate(self, approxes, target, weight):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        preds = np.array(approxes[0])
        target = np.array(target)
        if weight is not None:
            weight = np.array(weight)

        return mean_squared_error(target, preds, sample_weight=weight), 0


class UserDefinedEvalMetric:
    # should always minimize
    eval_metric = mean_squared_error  # set the default

    def set_metric(self, feval):
        UserDefinedEvalMetric.eval_metric = feval

    def __call__(self, y_true, y_predicted, sample_weight=None):
        return UserDefinedEvalMetric.eval_metric(y_true, y_predicted, sample_weight)


def xgboost_eval_metric_user_defined(preds, dtrain):
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    if len(weight) == 0:
        weight = None
    metric = UserDefinedEvalMetric()
    return "user_defined_metric", metric(target, preds, sample_weight=weight)


def lightgbm_eval_metric_user_defined(preds, dtrain):
    target = dtrain.get_label()
    weight = dtrain.get_weight()
    metric = UserDefinedEvalMetric()
    return "user_defined_metric", metric(target, preds, sample_weight=weight), False


class CatBoostEvalMetricUserDefined(object):
    def get_final_error(self, error, weight):
        return error

    def is_max_optimal(self):
        return False

    def evaluate(self, approxes, target, weight):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        preds = np.array(approxes[0])
        target = np.array(target)
        if weight is not None:
            weight = np.array(weight)

        metric = UserDefinedEvalMetric()
        return metric(target, preds, sample_weight=weight), 0


class Metric(object):
    def __init__(self, params):
        if params is None:
            raise MetricException("Metric params not defined")
        self.params = params
        self.name = self.params.get("name")
        if self.name is None:
            raise MetricException("Metric name not defined")

        self.minimize_direction = self.name in [
            "logloss",
            "auc",  # negative auc
            "rmse",
            "mae",
            "mse",
            "r2",  # negative r2
            "mape",
            "spearman",  # negative
            "pearson",  # negative
            "f1",  # negative
            "average_precision",  # negative
            "accuracy",  # negative
            "user_defined_metric",
        ]
        if self.name == "logloss":
            self.metric = logloss
        elif self.name == "auc":
            self.metric = negative_auc
        elif self.name == "acc":
            self.metric = accuracy_score
        elif self.name == "rmse":
            self.metric = rmse
        elif self.name == "mse":
            self.metric = mean_squared_error
        elif self.name == "mae":
            self.metric = mean_absolute_error
        elif self.name == "r2":
            self.metric = negative_r2
        elif self.name == "mape":
            self.metric = mean_absolute_percentage_error
        elif self.name == "spearman":
            self.metric = negative_spearman
        elif self.name == "pearson":
            self.metric = negative_pearson
        elif self.name == "f1":
            self.metric = negative_f1
        elif self.name == "average_precision":
            self.metric = negative_average_precision
        elif self.name == "accuracy":
            self.metric = negative_accuracy
        elif self.name == "user_defined_metric":
            self.metric = UserDefinedEvalMetric.eval_metric
        # elif self.name == "rmsle": # need to update target preprocessing
        #    self.metric = rmsle     # to assure that target is not negative ...
        else:
            raise MetricException(f"Unknown metric '{self.name}'")

    def __call__(self, y_true, y_predicted, sample_weight=None):
        return self.metric(y_true, y_predicted, sample_weight=sample_weight)

    def improvement(self, previous, current):
        if self.minimize_direction:
            return current < previous
        return current > previous

    def get_maximum(self):
        if self.minimize_direction:
            return 10e12
        else:
            return -10e12

    def worst_value(self):
        if self.minimize_direction:
            return np.Inf
        return -np.Inf

    def get_minimize_direction(self):
        return self.minimize_direction

    def is_negative(self):
        return self.name in [
            "auc",
            "r2",
            "spearman",
            "pearson",
            "f1",
            "average_precision",
            "accuracy",
        ]

    @staticmethod
    def optimize_negative(metric_name):
        return metric_name in [
            "auc",
            "r2",
            "spearman",
            "pearson",
            "f1",
            "average_precision",
            "accuracy",
        ]

```

--------------------------------------------------------------------------------
/supervised/algorithms/catboost.py:
--------------------------------------------------------------------------------

```python
import copy
import logging
import time

import numpy as np
import pandas as pd
from sklearn.base import ClassifierMixin, RegressorMixin

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (
    BINARY_CLASSIFICATION,
    MULTICLASS_CLASSIFICATION,
    REGRESSION,
    AlgorithmsRegistry,
)
from supervised.preprocessing.preprocessing_utils import PreprocessingUtils
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import (
    CatBoostEvalMetricAveragePrecision,
    CatBoostEvalMetricMSE,
    CatBoostEvalMetricPearson,
    CatBoostEvalMetricSpearman,
    CatBoostEvalMetricUserDefined,
)

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)

import catboost
from catboost import CatBoostClassifier, CatBoostRegressor, Pool


def catboost_eval_metric(ml_task, eval_metric):
    if eval_metric == "user_defined_metric":
        return eval_metric
    metric_name_mapping = {
        BINARY_CLASSIFICATION: {
            "auc": "AUC",
            "logloss": "Logloss",
            "f1": "F1",
            "average_precision": "average_precision",
            "accuracy": "Accuracy",
        },
        MULTICLASS_CLASSIFICATION: {
            "logloss": "MultiClass",
            "f1": "TotalF1:average=Micro",
            "accuracy": "Accuracy",
        },
        REGRESSION: {
            "rmse": "RMSE",
            "mse": "mse",
            "mae": "MAE",
            "mape": "MAPE",
            "r2": "R2",
            "spearman": "spearman",
            "pearson": "pearson",
        },
    }
    return metric_name_mapping[ml_task][eval_metric]


def catboost_objective(ml_task, eval_metric):
    objective = "RMSE"
    if ml_task == BINARY_CLASSIFICATION:
        objective = "Logloss"
    elif ml_task == MULTICLASS_CLASSIFICATION:
        objective = "MultiClass"
    else:  # ml_task == REGRESSION
        objective = catboost_eval_metric(REGRESSION, eval_metric)
        if objective in [
            "mse",
            "R2",
            "spearman",
            "pearson",
            "user_defined_metric",
        ]:  # cant optimize them directly
            objective = "RMSE"
    return objective


class CatBoostAlgorithm(BaseAlgorithm):
    algorithm_name = "CatBoost"
    algorithm_short_name = "CatBoost"
    warmup_iterations = 20

    def __init__(self, params):
        super(CatBoostAlgorithm, self).__init__(params)
        self.library_version = catboost.__version__
        self.snapshot_file_path = "training_snapshot"

        self.explain_level = params.get("explain_level", 0)
        self.rounds = additional.get("max_rounds", 10000)
        self.max_iters = 1
        self.early_stopping_rounds = additional.get("early_stopping_rounds", 50)

        Algo = CatBoostClassifier
        loss_function = "Logloss"
        if self.params["ml_task"] == BINARY_CLASSIFICATION:
            loss_function = self.params.get("loss_function", "Logloss")
        elif self.params["ml_task"] == MULTICLASS_CLASSIFICATION:
            loss_function = self.params.get("loss_function", "MultiClass")
        elif self.params["ml_task"] == REGRESSION:
            loss_function = self.params.get("loss_function", "RMSE")
            Algo = CatBoostRegressor

        cat_params = {
            "iterations": self.params.get("num_boost_round", self.rounds),
            "learning_rate": self.params.get("learning_rate", 0.1),
            "depth": self.params.get("depth", 3),
            "rsm": self.params.get("rsm", 1.0),
            "l2_leaf_reg": self.params.get("l2_leaf_reg", 3.0),
            "random_strength": self.params.get("random_strength", 1.0),
            "loss_function": loss_function,
            "eval_metric": self.params.get("eval_metric", loss_function),
            # "custom_metric": self.params.get("eval_metric", loss_function),
            "thread_count": self.params.get("n_jobs", -1),
            "verbose": False,
            "allow_writing_files": False,
            "random_seed": self.params.get("seed", 1),
        }

        for extra_param in [
            "min_data_in_leaf",
            "bootstrap_type",
            "bagging_temperature",
            "subsample",
            "border_count",
        ]:
            if extra_param in self.params:
                cat_params[extra_param] = self.params[extra_param]

        self.log_metric_name = cat_params["eval_metric"]
        if cat_params["eval_metric"] == "spearman":
            cat_params["eval_metric"] = CatBoostEvalMetricSpearman()
            self.log_metric_name = "CatBoostEvalMetricSpearman"
        elif cat_params["eval_metric"] == "pearson":
            cat_params["eval_metric"] = CatBoostEvalMetricPearson()
            self.log_metric_name = "CatBoostEvalMetricPearson"
        elif cat_params["eval_metric"] == "average_precision":
            cat_params["eval_metric"] = CatBoostEvalMetricAveragePrecision()
            self.log_metric_name = "CatBoostEvalMetricAveragePrecision"
        elif cat_params["eval_metric"] == "mse":
            cat_params["eval_metric"] = CatBoostEvalMetricMSE()
            self.log_metric_name = "CatBoostEvalMetricMSE"
        elif cat_params["eval_metric"] == "user_defined_metric":
            cat_params["eval_metric"] = CatBoostEvalMetricUserDefined()
            self.log_metric_name = "CatBoostEvalMetricUserDefined"

        self.model = Algo(**cat_params)
        self.cat_features = None
        self.best_ntree_limit = 0

        logger.debug("CatBoostAlgorithm.__init__")

    def _assess_iterations(self, X, y, sample_weight, eval_set, max_time=None):
        if max_time is None:
            max_time = 3600
        try:
            model = copy.deepcopy(self.model)
            model.set_params(iterations=self.warmup_iterations)
            start_time = time.time()
            model.fit(
                X,
                y,
                sample_weight=sample_weight,
                cat_features=self.cat_features,
                init_model=None if self.model.tree_count_ is None else self.model,
                eval_set=eval_set,
                early_stopping_rounds=self.early_stopping_rounds,
                verbose_eval=False,
            )
            elapsed_time = (time.time() - start_time) / float(self.warmup_iterations)
            # print(max_time, elapsed_time, max_time / elapsed_time, np.round(time.time() - start_time, 2))
            new_rounds = int(min(10000, max_time / elapsed_time))
            new_rounds = max(new_rounds, 10)
            return model, new_rounds
        except Exception as e:
            # print(str(e))
            return None, 1000

    def fit(
        self,
        X,
        y,
        sample_weight=None,
        X_validation=None,
        y_validation=None,
        sample_weight_validation=None,
        log_to_file=None,
        max_time=None,
    ):
        if self.is_fitted():
            print("CatBoost model already fitted. Skip fit().")
            return

        if self.cat_features is None:
            self.cat_features = []
            for i in range(X.shape[1]):
                if PreprocessingUtils.is_categorical(X.iloc[:, i]):
                    self.cat_features += [i]
                    col_name = X.columns[i]
                    X[col_name] = X[col_name].astype(str)
                    if X_validation is not None:
                        X_validation[col_name] = X_validation[col_name].astype(str)

        eval_set = None
        if X_validation is not None and y_validation is not None:
            eval_set = Pool(
                data=X_validation,
                label=y_validation,
                cat_features=self.cat_features,
                weight=sample_weight_validation,
            )

        if self.params.get("num_boost_round") is None:
            model_init, new_iterations = self._assess_iterations(
                X, y, sample_weight, eval_set, max_time
            )
            self.model.set_params(iterations=new_iterations)
        else:
            model_init = None
            self.model.set_params(iterations=self.params.get("num_boost_round"))
            self.early_stopping_rounds = self.params.get("early_stopping_rounds", 50)

        self.model.fit(
            X,
            y,
            sample_weight=sample_weight,
            cat_features=self.cat_features,
            init_model=model_init,
            eval_set=eval_set,
            early_stopping_rounds=self.early_stopping_rounds,
            verbose_eval=False,
        )

        if self.model.best_iteration_ is not None:
            if model_init is not None:
                self.best_ntree_limit = (
                    self.model.best_iteration_ + model_init.tree_count_ + 1
                )
            else:
                self.best_ntree_limit = self.model.best_iteration_ + 1

        else:
            # just take all the trees
            # the warm-up trees are already included
            # dont need to add +1
            self.best_ntree_limit = self.model.tree_count_

        if log_to_file is not None:
            train_scores = self.model.evals_result_["learn"].get(self.log_metric_name)
            validation_scores = self.model.evals_result_["validation"].get(
                self.log_metric_name
            )
            if model_init is not None:
                if train_scores is not None:
                    train_scores = (
                        model_init.evals_result_["learn"].get(self.log_metric_name)
                        + train_scores
                    )
                if validation_scores is not None:
                    validation_scores = (
                        model_init.evals_result_["validation"].get(self.log_metric_name)
                        + validation_scores
                    )
            iteration = None
            if train_scores is not None:
                iteration = range(len(validation_scores))
            elif validation_scores is not None:
                iteration = range(len(validation_scores))

            result = pd.DataFrame(
                {
                    "iteration": iteration,
                    "train": train_scores,
                    "validation": validation_scores,
                }
            )
            result.to_csv(log_to_file, index=False, header=False)

        if self.params["ml_task"] != REGRESSION:
            self.classes_ = np.unique(y)

    def is_fitted(self):
        return self.model is not None and self.model.tree_count_ is not None

    def predict(self, X):
        self.reload()
        if self.params["ml_task"] == BINARY_CLASSIFICATION:
            return self.model.predict_proba(X, ntree_end=self.best_ntree_limit)[:, 1]
        elif self.params["ml_task"] == MULTICLASS_CLASSIFICATION:
            return self.model.predict_proba(X, ntree_end=self.best_ntree_limit)

        return self.model.predict(X, ntree_end=self.best_ntree_limit)

    def copy(self):
        return copy.deepcopy(self)

    def save(self, model_file_path):
        self.model.save_model(model_file_path)
        self.model_file_path = model_file_path
        logger.debug("CatBoostAlgorithm save model to %s" % model_file_path)

    def load(self, model_file_path):
        logger.debug("CatBoostLearner load model from %s" % model_file_path)

        # waiting for fix https://github.com/catboost/catboost/issues/696
        Algo = CatBoostClassifier
        if self.params["ml_task"] == REGRESSION:
            Algo = CatBoostRegressor

        # loading might throw warnings in the case of custom eval_metric
        # check https://github.com/catboost/catboost/issues/1169
        self.model = Algo().load_model(model_file_path)
        self.model_file_path = model_file_path

    def file_extension(self):
        return "catboost"

    def get_metric_name(self):
        metric = self.params.get("eval_metric")
        if metric is None:
            return None
        if metric == "Logloss":
            return "logloss"
        elif metric == "AUC":
            return "auc"
        elif metric == "MultiClass":
            return "logloss"
        elif metric == "RMSE":
            return "rmse"
        elif metric == "MSE":
            return "mse"
        elif metric == "MAE":
            return "mae"
        elif metric == "MAPE":
            return "mape"
        elif metric in ["F1", "TotalF1:average=Micro"]:
            return "f1"
        elif metric == "Accuracy":
            return "accuracy"
        return metric


classification_params = {
    "learning_rate": [0.025, 0.05, 0.1, 0.2],
    "depth": [4, 5, 6, 7, 8, 9],
    "rsm": [0.7, 0.8, 0.9, 1],  # random subspace method
    "loss_function": ["Logloss"],
}

classification_default_params = {
    "learning_rate": 0.1,
    "depth": 6,
    "rsm": 1,
    "loss_function": "Logloss",
}

additional = {
    "max_rounds": 10000,
    "early_stopping_rounds": 50,
    "max_rows_limit": None,
    "max_cols_limit": None,
}
required_preprocessing = [
    "missing_values_inputation",
    "datetime_transform",
    "text_transform",
    "target_as_integer",
]


class CBClassifier(ClassifierMixin, CatBoostAlgorithm):
    pass


AlgorithmsRegistry.add(
    BINARY_CLASSIFICATION,
    CBClassifier,
    classification_params,
    required_preprocessing,
    additional,
    classification_default_params,
)

multiclass_classification_params = copy.deepcopy(classification_params)
multiclass_classification_params["loss_function"] = ["MultiClass"]
multiclass_classification_params["depth"] = [3, 4, 5, 6]
multiclass_classification_params["learning_rate"] = [0.1, 0.15, 0.2]

multiclass_classification_default_params = copy.deepcopy(classification_default_params)
multiclass_classification_default_params["loss_function"] = "MultiClass"
multiclass_classification_default_params["depth"] = 5
multiclass_classification_default_params["learning_rate"] = 0.15


AlgorithmsRegistry.add(
    MULTICLASS_CLASSIFICATION,
    CBClassifier,
    multiclass_classification_params,
    required_preprocessing,
    additional,
    multiclass_classification_default_params,
)

regression_params = copy.deepcopy(classification_params)
regression_params["loss_function"] = ["RMSE", "MAE", "MAPE"]

regression_required_preprocessing = [
    "missing_values_inputation",
    "datetime_transform",
    "text_transform",
    "target_scale",
]


regression_default_params = {
    "learning_rate": 0.1,
    "depth": 6,
    "rsm": 1,
    "loss_function": "RMSE",
}


class CBRegressor(RegressorMixin, CatBoostAlgorithm):
    pass


AlgorithmsRegistry.add(
    REGRESSION,
    CBRegressor,
    regression_params,
    regression_required_preprocessing,
    additional,
    regression_default_params,
)

```

--------------------------------------------------------------------------------
/supervised/fairness/optimization.py:
--------------------------------------------------------------------------------

```python
import numpy as np


class FairnessOptimization:
    @staticmethod
    def binary_classification(
        target,
        predicted_labels,
        sensitive_features,
        fairness_metric,
        fairness_threshold,
        privileged_groups=[],
        underprivileged_groups=[],
        previous_fairness_optimization=None,
        min_selection_rate=None,
        max_selection_rate=None,
    ):
        target = np.array(target).ravel()
        preds = np.array(predicted_labels)

        # fairness optimization stats
        sensitive_values = {}
        for col in sensitive_features.columns:
            col_name = col[10:]  # skip 'senstive_'
            values = list(sensitive_features[col].unique())
            sensitive_values[col] = values

            for v in values:
                ii = sensitive_features[col] == v

            new_sensitive_values = {}
            for k, prev_values in sensitive_values.items():
                if k == col:
                    continue
                new_sensitive_values[f"{k}@{col}"] = []
                for v in values:
                    for pv in prev_values:
                        if isinstance(pv, tuple):
                            new_sensitive_values[f"{k}@{col}"] += [(*pv, v)]
                        else:
                            new_sensitive_values[f"{k}@{col}"] += [(pv, v)]

            sensitive_values = {**sensitive_values, **new_sensitive_values}

        # print(sensitive_values)

        sensitive_indices = {}
        for k, values_list in sensitive_values.items():
            if k.count("@") == sensitive_features.shape[1] - 1:
                # print(k)
                # print("values_list",values_list)
                cols = k.split("@")
                for values in values_list:
                    if not isinstance(values, tuple):
                        values = (values,)
                    # print("values", values)

                    ii = None
                    for i, c in enumerate(cols):
                        if ii is None:
                            ii = sensitive_features[c] == values[i]
                        else:
                            ii &= sensitive_features[c] == values[i]

                    key = "@".join([str(s) for s in values])
                    # print(key, np.sum(ii))
                    sensitive_indices[key] = ii

        total_dp_ratio = min_selection_rate / max_selection_rate
        # print("total dp ratio", total_dp_ratio)

        c0 = np.sum(target == 0)
        c1 = np.sum(target == 1)

        selection_rates = {}
        weights = {}

        for key, indices in sensitive_indices.items():
            selection_rates[key] = np.sum((preds == 1) & indices) / np.sum(indices)
            # print(key, np.sum(indices), selection_rates[key])

            t = np.sum(indices)
            t0 = np.sum(indices & (target == 0))
            t1 = np.sum(indices & (target == 1))

            w0 = t / target.shape[0] * c0 / t0
            w1 = t / target.shape[0] * c1 / t1

            # print("----", key, w0, w1, t, t0, t1)
            weights[key] = [w0, w1]

        max_selection_rate = np.max(list(selection_rates.values()))
        min_selection_rate = np.min(list(selection_rates.values()))

        for k, v in selection_rates.items():
            selection_rates[k] = v / max_selection_rate

        # print("previous fairness optimization")
        # print(previous_fairness_optimization)
        # print("********")

        previous_weights = {}
        if previous_fairness_optimization is not None:
            weights = previous_fairness_optimization.get("weights")
            for key, indices in sensitive_indices.items():
                # print("Previous")
                # print(previous_fairness_optimization["selection_rates"][key], selection_rates[key])

                direction = 0.0
                if (
                    previous_fairness_optimization["selection_rates"][key]
                    < selection_rates[key]
                ):
                    # print("Improvement")
                    direction = 1.0
                elif selection_rates[key] > 0.8:
                    # print("GOOD")
                    direction = 0.0
                else:
                    # print("Decrease")
                    direction = -0.5

                # need to add previous weights instead 1.0
                prev_weights = previous_fairness_optimization.get(
                    "previous_weights", {}
                ).get(key, [1, 1])
                # print("prev_weights", prev_weights)
                delta0 = weights[key][0] - prev_weights[0]
                delta1 = weights[key][1] - prev_weights[1]

                previous_weights[key] = [weights[key][0], weights[key][1]]

                # print("BEFORE")
                # print(weights[key])
                weights[key][0] += direction * delta0
                weights[key][1] += direction * delta1
                # print("AFTER")
                # print(weights[key])
                # print(previous_fairness_optimization["weights"][key])

        step = None
        if previous_fairness_optimization is not None:
            step = previous_fairness_optimization.get("step")

        if step is None:
            step = 0
        else:
            step += 1

        return {
            "selection_rates": selection_rates,
            "previous_weights": previous_weights,
            "weights": weights,
            "total_dp_ratio": total_dp_ratio,
            "step": step,
            "fairness_threshold": fairness_threshold,
        }

    @staticmethod
    def regression(
        target,
        predictions,
        sensitive_features,
        fairness_metric,
        fairness_threshold,
        privileged_groups=[],
        underprivileged_groups=[],
        previous_fairness_optimization=None,
        performance_metric=None,
        performance_metric_name=None,
    ):
        target = np.array(target).ravel()
        preds = np.array(predictions)

        # fairness optimization stats
        sensitive_values = {}
        for col in sensitive_features.columns:
            col_name = col[10:]  # skip 'senstive_'
            values = list(sensitive_features[col].unique())
            sensitive_values[col] = values

            for v in values:
                ii = sensitive_features[col] == v

            new_sensitive_values = {}
            for k, prev_values in sensitive_values.items():
                if k == col:
                    continue
                new_sensitive_values[f"{k}@{col}"] = []
                for v in values:
                    for pv in prev_values:
                        if isinstance(pv, tuple):
                            new_sensitive_values[f"{k}@{col}"] += [(*pv, v)]
                        else:
                            new_sensitive_values[f"{k}@{col}"] += [(pv, v)]

            sensitive_values = {**sensitive_values, **new_sensitive_values}

        sensitive_indices = {}
        least_frequent_key = None
        least_frequency = sensitive_features.shape[0]
        for k, values_list in sensitive_values.items():
            if k.count("@") == sensitive_features.shape[1] - 1:
                # print(k)
                # print("values_list",values_list)
                cols = k.split("@")
                for values in values_list:
                    if not isinstance(values, tuple):
                        values = (values,)
                    # print("values", values)

                    ii = None
                    for i, c in enumerate(cols):
                        if ii is None:
                            ii = sensitive_features[c] == values[i]
                        else:
                            ii &= sensitive_features[c] == values[i]

                    key = "@".join([str(s) for s in values])
                    if np.sum(ii) > 0:
                        sensitive_indices[key] = ii
                        if np.sum(ii) < least_frequency:
                            least_frequency = np.sum(ii)
                            least_frequent_key = key

        weights = {}
        performance = {}

        for key, indices in sensitive_indices.items():
            w = target.shape[0] / len(sensitive_indices) / np.sum(indices)
            weights[key] = w
            performance[key] = performance_metric(target[indices], predictions[indices])

        # try to upscale more the largest weight
        weights[least_frequent_key] *= 1.5

        denominator = np.max(list(performance.values()))
        new_performance = {}
        for k, v in performance.items():
            new_performance[k] = np.round(v / denominator, 4)
        performance = new_performance

        previous_weights = {}
        if previous_fairness_optimization is not None:
            weights = previous_fairness_optimization.get("weights")
            for key, indices in sensitive_indices.items():
                direction = 0.0
                if (
                    previous_fairness_optimization["performance"][key]
                    < performance[key]
                ):
                    direction = 1.0
                elif performance[key] > fairness_threshold:
                    direction = 0.0
                else:
                    direction = -0.5

                # need to add previous weights instead 1.0
                prev_weights = previous_fairness_optimization.get(
                    "previous_weights", {}
                ).get(key, 1)
                delta0 = weights[key] - prev_weights
                previous_weights[key] = weights[key]
                weights[key] = max(weights[key] + direction * delta0, 0.01)

        no_weights_change = False
        if str(previous_weights) == str(weights):
            no_weights_change = True

        step = None
        if previous_fairness_optimization is not None:
            step = previous_fairness_optimization.get("step")

        if step is None:
            step = 0
        else:
            if not no_weights_change:
                step += 1

        return {
            "performance": performance,
            "previous_weights": previous_weights,
            "weights": weights,
            "step": step,
            "fairness_threshold": fairness_threshold,
        }

    @staticmethod
    def multiclass_classification(
        target,
        predicted_labels,
        sensitive_features,
        fairness_metric,
        fairness_threshold,
        privileged_groups=[],
        underprivileged_groups=[],
        previous_fairness_optimization=None,
    ):
        target = np.array(target).ravel()
        preds = np.array(predicted_labels)
        target_values = list(np.unique(target))

        # fairness optimization stats
        sensitive_values = {}
        for col in sensitive_features.columns:
            col_name = col[10:]  # skip 'senstive_'
            values = list(sensitive_features[col].unique())
            sensitive_values[col] = values
            for v in values:
                ii = sensitive_features[col] == v
            new_sensitive_values = {}
            for k, prev_values in sensitive_values.items():
                if k == col:
                    continue
                new_sensitive_values[f"{k}@{col}"] = []
                for v in values:
                    for pv in prev_values:
                        if isinstance(pv, tuple):
                            new_sensitive_values[f"{k}@{col}"] += [(*pv, v)]
                        else:
                            new_sensitive_values[f"{k}@{col}"] += [(pv, v)]

            sensitive_values = {**sensitive_values, **new_sensitive_values}

        sensitive_indices = {}
        for k, values_list in sensitive_values.items():
            if k.count("@") == sensitive_features.shape[1] - 1:
                cols = k.split("@")
                for values in values_list:
                    if not isinstance(values, tuple):
                        values = (values,)

                    ii = None
                    for i, c in enumerate(cols):
                        if ii is None:
                            ii = sensitive_features[c] == values[i]
                        else:
                            ii &= sensitive_features[c] == values[i]

                    key = "@".join([str(s) for s in values])
                    sensitive_indices[key] = ii

        cs = {}
        for t in target_values:
            cs[t] = np.sum(target == t)
        selection_rates = {}
        weights = {}

        for key, indices in sensitive_indices.items():
            weights[key] = []
            sv = np.sum(indices)
            selection_rates[key] = {}
            for t in target_values:
                selection_rates[key][t] = np.sum((preds == t) & indices) / np.sum(
                    indices
                )

                t_k = np.sum(indices & (target == t))
                w_k = sv / target.shape[0] * cs[t] / t_k
                weights[key] += [w_k]

        for t in target_values:
            values = []
            for k, v in selection_rates.items():
                values += [v[t]]
            max_selection_rate = np.max(values)
            for k, v in selection_rates.items():
                v[t] /= max_selection_rate

        previous_weights = {}
        if previous_fairness_optimization is not None:
            weights = previous_fairness_optimization.get("weights")
            for key, indices in sensitive_indices.items():
                previous_weights[key] = [1] * len(target_values)
                for i, t in enumerate(target_values):
                    direction = 0.0
                    if (
                        previous_fairness_optimization["selection_rates"][key][t]
                        < selection_rates[key][t]
                    ):
                        direction = 1.0
                    elif selection_rates[key][t] > 0.8:
                        direction = 0.0
                    else:
                        direction = -0.5

                    # need to add previous weights instead 1.0
                    prev_weights = previous_fairness_optimization.get(
                        "previous_weights", {}
                    ).get(key, [1] * len(target_values))

                    delta_i = weights[key][i] - prev_weights[i]

                    previous_weights[key][i] = weights[key][i]

                    weights[key][i] += direction * delta_i

        step = None
        if previous_fairness_optimization is not None:
            step = previous_fairness_optimization.get("step")

        if step is None:
            step = 0
        else:
            step += 1

        return {
            "selection_rates": selection_rates,
            "previous_weights": previous_weights,
            "weights": weights,
            "step": step,
            "fairness_threshold": fairness_threshold,
            "target_values": target_values,
        }

```

--------------------------------------------------------------------------------
/supervised/utils/automl_plots.py:
--------------------------------------------------------------------------------

```python
import logging
import os
import traceback # For exception details

import numpy as np
import pandas as pd
import scipy as sp
# --- Added Import ---
from sklearn.preprocessing import MinMaxScaler
# --------------------

logger = logging.getLogger(__name__)
from supervised.utils.config import LOG_LEVEL
logger.setLevel(LOG_LEVEL)
# Add a handler if running standalone for testing
if not logger.hasHandlers():
    handler = logging.StreamHandler()
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)


import warnings
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

class AutoMLPlots:
    # Original filename definitions
    features_heatmap_fname = "features_heatmap.png"
    correlation_heatmap_fname = "correlation_heatmap.png"
    # Filename for Scaled Plot
    features_heatmap_scaled_fname = "features_heatmap_scaled.png"

    @staticmethod
    def _plot_feature_heatmap(data_df, title, plot_path, cmap="Blues", vmin=None, vmax=None, cbar_label='Importance'):
        """ Helper method to generate and save a feature importance heatmap. """
        try:
            logger.info(f"Generating heatmap: '{title}'")
            # Adjust height dynamically based on number of features
            plot_height = max(7, len(data_df.index) * 0.35)
            fig, ax = plt.subplots(1, 1, figsize=(10, plot_height))

            image = ax.imshow(
                data_df,
                interpolation="nearest",
                cmap=plt.cm.get_cmap(cmap),
                aspect="auto",
                vmin=vmin, # Use provided vmin
                vmax=vmax  # Use provided vmax
            )
            cbar = plt.colorbar(mappable=image)
            cbar.set_label(cbar_label) # Use provided label

            x_tick_marks = np.arange(len(data_df.columns))
            y_tick_marks = np.arange(len(data_df.index))
            ax.set_xticks(x_tick_marks)
            ax.set_xticklabels(data_df.columns, rotation=90)
            ax.set_yticks(y_tick_marks)
            ax.set_yticklabels(data_df.index)
            ax.set_title(title)

            plt.tight_layout(pad=2.0)
            plt.savefig(plot_path)
            logger.info(f"Saved heatmap to: {plot_path}")
            plt.close(fig) # Close the specific figure

        except Exception as e:
            logger.error(f"Failed to generate heatmap '{title}': {e}")
            logger.error(traceback.format_exc())
            plt.close("all") # Close any potentially open plots on error


    @staticmethod
    def add(results_path, models, fout):
        """
        Adds plots to the report file stream. Now includes both unscaled and scaled importance.

        Args:
            results_path (str): Path to results directory.
            models (list): List of model objects.
            fout (file object): Writable file object for the report.
        """
        # Generate both feature importance plots
        AutoMLPlots.models_feature_importance(results_path, models)

        # --- Unscaled Feature Importance Section ---
        features_plot_path = os.path.join(
            results_path, AutoMLPlots.features_heatmap_fname # Use original filename
        )
        if os.path.exists(features_plot_path):
            fout.write("\n\n### Features Importance (Original Scale)\n") # Updated title
            fout.write(
                f"![features importance across models]({AutoMLPlots.features_heatmap_fname})\n\n" # Use original filename
            )
        else:
            logger.warning(f"Original feature importance plot not found at: {features_plot_path}")


        # --- Scaled Feature Importance Section ---
        features_scaled_plot_path = os.path.join(
            results_path, AutoMLPlots.features_heatmap_scaled_fname # Use scaled filename
        )
        if os.path.exists(features_scaled_plot_path):
            fout.write("\n\n### Scaled Features Importance (MinMax per Model)\n") # Title for scaled plot
            fout.write(
                f"![scaled features importance across models]({AutoMLPlots.features_heatmap_scaled_fname})\n\n" # Use scaled filename
            )
        else:
            logger.warning(f"Scaled feature importance plot not found at: {features_scaled_plot_path}")


        # --- Correlation Section (remains the same) ---
        AutoMLPlots.models_correlation(results_path, models)

        correlation_plot_path = os.path.join(
            results_path, AutoMLPlots.correlation_heatmap_fname
        )
        if os.path.exists(correlation_plot_path):
            fout.write("\n\n### Spearman Correlation of Models\n")
            fout.write(
                f"![models spearman correlation]({AutoMLPlots.correlation_heatmap_fname})\n\n"
            )
        else:
            logger.warning(f"Model correlation plot not found at: {correlation_plot_path}")


    @staticmethod
    def models_feature_importance(results_path, models):
        """
        Generates and saves BOTH original and scaled feature importance heatmaps.
        """
        logger.info("Starting feature importance generation (original and scaled).")
        try:
            # --- Data Aggregation (Common part) ---
            model_feature_imp = {}
            # (Same robust reading logic as before)
            for m in models:
                model_name = m.get_name()
                model_path = os.path.join(results_path, model_name)
                logger.debug(f"Processing model '{model_name}' in '{model_path}'")
                if not os.path.isdir(model_path):
                    logger.warning(f"Directory not found for model '{model_name}'. Skipping.")
                    continue
                try:
                    all_files = os.listdir(model_path)
                except OSError as e:
                    logger.error(f"Cannot list directory {model_path}: {e}. Skipping model '{model_name}'.")
                    continue
                imp_data = [f for f in all_files if "_importance.csv" in f and "shap" not in f]
                if not imp_data:
                    logger.warning(f"No suitable importance files found for model '{model_name}'. Skipping.")
                    continue
                df_all = []
                for fname in imp_data:
                    file_path = os.path.join(model_path, fname)
                    try:
                        df = pd.read_csv(file_path, index_col=0)
                        numeric_df = df.select_dtypes(include=np.number)
                        if numeric_df.empty or numeric_df.isnull().all().all():
                            logger.warning(f"File {fname} (model '{model_name}') contains no valid numeric data. Skipping.")
                            continue
                        df_all.append(df)
                    except Exception as read_e:
                        logger.error(f"Error reading/processing file {fname} (model '{model_name}'): {read_e}. Skipping.")
                        continue
                if not df_all:
                    logger.warning(f"No valid importance dataframes read for model '{model_name}'. Skipping.")
                    continue
                try:
                    df_concat = pd.concat(df_all, axis=1, join='outer')
                    numeric_df_concat = df_concat.select_dtypes(include=np.number)
                    if not numeric_df_concat.empty:
                         model_feature_imp[model_name] = numeric_df_concat.mean(axis=1).fillna(0)
                    else:
                         logger.warning(f"No numeric data after concat for model '{model_name}'. Skipping.")
                except Exception as concat_e:
                     logger.error(f"Error aggregating importance for model '{model_name}': {concat_e}")
                     continue

            logger.info(f"Collected feature importance for {len(model_feature_imp)} models.")
            if len(model_feature_imp) < 2:
                logger.warning("Feature importance heatmaps require at least 2 models with data. Skipping plot generation.")
                return

            mfi = pd.concat(model_feature_imp, axis=1, join='outer').fillna(0)
            logger.debug(f"Combined importance DataFrame shape: {mfi.shape}")

            # --- Sorting & Top N (Common part) ---
            mfi["m"] = mfi.mean(axis=1)
            mfi_sorted = mfi.sort_values(by="m", ascending=False)
            mfi_sorted = mfi_sorted.drop("m", axis=1) # Keep original mfi for potential later use if needed

            num_features_original = mfi_sorted.shape[0]
            mfi_plot_data = mfi_sorted # Default to using all sorted features
            title_suffix = "Feature Importance"
            scaled_title_suffix = "Scaled Feature Importance (MinMax per model)"

            if num_features_original > 25:
                mfi_plot_data = mfi_sorted.head(25)
                title_suffix = f"Top-25 ({num_features_original} total) Feature Importance"
                scaled_title_suffix = f"Top-25 ({num_features_original} total) Scaled Feature Importance (MinMax per model)"
                logger.info(f"Selecting top 25 features out of {num_features_original} for plotting.")
            else:
                 logger.info(f"Using all {num_features_original} features for plotting.")


            # --- Plotting Unscaled Version ---
            unscaled_plot_path = os.path.join(results_path, AutoMLPlots.features_heatmap_fname)
            AutoMLPlots._plot_feature_heatmap(
                data_df=mfi_plot_data,
                title=title_suffix + " (Original Scale)",
                plot_path=unscaled_plot_path,
                cbar_label='Importance'
                # vmin/vmax are auto-detected by default
            )

            # --- Scaling Data ---
            logger.debug("Applying Min-Max scaling for the second plot.")
            scaler = MinMaxScaler()
            mfi_scaled_array = scaler.fit_transform(mfi_plot_data) # Scale the potentially filtered data
            mfi_scaled = pd.DataFrame(mfi_scaled_array, index=mfi_plot_data.index, columns=mfi_plot_data.columns)

            # --- Plotting Scaled Version ---
            scaled_plot_path = os.path.join(results_path, AutoMLPlots.features_heatmap_scaled_fname)
            AutoMLPlots._plot_feature_heatmap(
                data_df=mfi_scaled,
                title=scaled_title_suffix,
                plot_path=scaled_plot_path,
                vmin=0, # Explicit range for scaled data
                vmax=1,
                cbar_label='Scaled Importance (MinMax per model)'
            )

            logger.info("Finished generating feature importance plots.")

        except Exception as e:
            logger.error(f"An error occurred during feature importance processing: {e}")
            logger.error(traceback.format_exc())
            plt.close("all") # Ensure plots are closed on unexpected error


    # --- correlation and models_correlation methods remain the same as in the previous version ---
    # (Include the improved versions from the previous response here)
    @staticmethod
    def correlation(oof1, oof2):
        """ Calculates mean Spearman correlation between prediction columns """
        # (Original code - unchanged)
        cols = [c for c in oof1.columns if "prediction" in c]
        # Check if prediction columns exist
        if not cols or not all(c in oof2.columns for c in cols):
            logger.warning("Prediction columns mismatch or not found for correlation calculation.")
            return np.nan # Return NaN if predictions can't be compared

        with warnings.catch_warnings():
            warnings.simplefilter(action="ignore")
            v = []
            for c in cols:
                try:
                    # Calculate Spearman correlation, ignore p-value
                    corr_val, _ = sp.stats.spearmanr(oof1[c], oof2[c])
                    # Handle potential NaN result from spearmanr if input variance is zero
                    if not np.isnan(corr_val):
                        v.append(corr_val)
                    else:
                         logger.debug(f"NaN result from spearmanr for column {c}. Skipping.")
                except Exception as corr_e:
                    logger.warning(f"Could not calculate Spearman correlation for column {c}: {corr_e}")

        # Return mean correlation, or NaN if no valid correlations were calculated
        return np.mean(v) if v else np.nan


    @staticmethod
    def models_correlation(results_path, models):
        """ Generates and saves model prediction correlation heatmap """
        # (Original code - minor logging/error handling improvements)
        logger.info("Starting model correlation heatmap generation.")
        try:
            if len(models) < 2:
                logger.warning("Model correlation heatmap requires at least 2 models. Skipping.")
                return

            names = []
            oofs = []
            valid_models_indices = [] # Keep track of models with valid OOF data

            for i, m in enumerate(models):
                try:
                    oof_data = m.get_out_of_folds()
                    # Basic validation of OOF data
                    if oof_data is None or oof_data.empty or not any("prediction" in c for c in oof_data.columns):
                        logger.warning(f"Model '{m.get_name()}' has invalid or missing out-of-folds prediction data. Excluding from correlation.")
                        continue

                    names.append(m.get_name())
                    oofs.append(oof_data)
                    valid_models_indices.append(i) # Store original index if valid
                    logger.debug(f"Got valid OOF data for model '{m.get_name()}'.")

                except AttributeError:
                     logger.warning(f"Model '{m.get_name()}' seems to be missing 'get_out_of_folds' method or it failed. Excluding from correlation.")
                     continue
                except Exception as oof_e:
                     logger.warning(f"Failed to get OOF data for model '{m.get_name()}': {oof_e}. Excluding from correlation.")
                     continue


            num_valid_models = len(names)
            if num_valid_models < 2:
                logger.warning(f"Fewer than 2 models ({num_valid_models}) have valid OOF data for correlation. Skipping plot generation.")
                return

            logger.info(f"Calculating correlations for {num_valid_models} models.")
            corrs = np.ones((num_valid_models, num_valid_models)) # Use num_valid_models dimension
            for i in range(num_valid_models):
                for j in range(i + 1, num_valid_models):
                    correlation_value = AutoMLPlots.correlation(oofs[i], oofs[j])
                    # Fill with NaN if correlation calculation failed
                    corrs[i, j] = corrs[j, i] = correlation_value if not np.isnan(correlation_value) else np.nan


            # Check if all correlations are NaN
            if np.isnan(corrs[np.triu_indices(num_valid_models, k=1)]).all():
                 logger.warning("All pairwise model correlations resulted in NaN. Cannot generate heatmap.")
                 return


            logger.info("Generating model correlation heatmap.")
            figsize = (15, 15) if num_valid_models > 15 else (10, 10) # Adjusted threshold
            fig, ax = plt.subplots(1, 1, figsize=figsize)

            image = ax.imshow(
                corrs,
                interpolation="nearest",
                cmap=plt.cm.get_cmap("Blues"),
                aspect="auto",
                vmin=np.nanmin(corrs), # Use nanmin/nanmax to handle potential NaNs
                vmax=np.nanmax(corrs)
            )
            plt.colorbar(mappable=image)

            x_tick_marks = np.arange(num_valid_models)
            y_tick_marks = np.arange(num_valid_models)
            ax.set_xticks(x_tick_marks)
            ax.set_xticklabels(names, rotation=90)
            ax.set_yticks(y_tick_marks)
            ax.set_yticklabels(names)
            ax.set_title("Spearman Correlation of Models' OOF Predictions") # Slightly more descriptive title

            plt.tight_layout(pad=2.0)

            # --- Saving the Plot ---
            os.makedirs(results_path, exist_ok=True) # Ensure directory exists
            plot_path = os.path.join(
                results_path, AutoMLPlots.correlation_heatmap_fname
            )
            plt.savefig(plot_path)
            logger.info(f"Saved model correlation heatmap to: {plot_path}")
            plt.close("all") # Close plot to free memory

        except Exception as e:
            # Log the exception with traceback
            logger.error(f"An error occurred during model correlation plotting: {e}")
            logger.error(traceback.format_exc())
             # Ensure plot is closed if error occurred during saving/closing
            plt.close("all")



```