Source code for ensemblem.model

import numpy as np
import pandas
from .weights_functions import *
from .metrics import *
from sklearn.preprocessing import MinMaxScaler
from typing import List


[docs]class KWEnsembler:
    """
    KWEnsembler class
    This class implements the K-Weighted Ensembler model.
    It is an ensemble model that uses the k-nearest
    neighbors of a sample to predict its target value.
    The weights of the neighbors are calculated using a weight function.
    The bias of the neighbors can be added to the prediction.
    :param k: number of neighbors to use
    :param bias: whether to add the bias of the neighbors to the prediction
    :param dist_metric: distance metric to use
    :return: Predictions of the target values for the test set
    :rtype: bytearray
    """
    def __init__(self, k: int = 5, bias: bool = False, dist_metric=euclidean):
        self.k = k
        self.bias = bias
        self.dist_metric = dist_metric

[docs]    def fit(
        self,
        X_neighbors: pandas.DataFrame,
        y_neighbors: pandas.DataFrame,
        features: List,
        range_min: int = 0,
        range_max: int = 1,
    ) -> None:
        """
        Fits the ensemble by creating the search space

        Parameters
        ----------

        :param X_neighbors: Neighbors search space
        :param y_neighbors: Neighbors search space Target values
        """
        self.X_neighbors = X_neighbors
        self.y_neighbors = y_neighbors
        self.x_scaler = MinMaxScaler((range_min, range_max))
        self.X_neighbors[features] = self.x_scaler.fit_transform(
            self.X_neighbors[features]
        )

    def _find_similar_neighbors(
        self, test_sample: pandas.Series, similar_space: pandas.DataFrame
    ) -> List:
        """
        Finds the k nearest neighbors of x in the similar_space

        Parameters
        ----------

        :param x: Sample to find the neighbors of
        :param similar_space: Search space

        :return: Indices of the k nearest neighbors
        """

        distances = self.dist_metric(test_sample, similar_space)
        y_sorted = [y for _, y in sorted(zip(distances, distances.index))]
        return y_sorted[: self.k]

[docs]    def predict(
        self,
        X_test: pandas.DataFrame,
        features: List,
        pred_columns: List,
        weight_function=w_inverse_LMAE,
    ) -> List:
        """
        Predicts the target values for the test set using the ensemble method

        :param X_test: Test set
        :param features: Features of the test set
        :param pred_columns: Columns to predict
        :param weight_function: Weight function to use
        :param range_min: Minimum value of minmax scaling
        :param range_max: Maximum value of minmax scaling

        :return: Predictions of the target values for the test set
        """

        X_test[features] = self.x_scaler.transform(X_test[features])
        predictions_ensembled = []

        for i in range(len(X_test)):
            _weights = np.zeros(len(pred_columns))
            _biases = np.zeros(len(pred_columns))
            _neighbors = self._find_similar_neighbors(
                X_test[features].iloc[i], self.X_neighbors[features]
            )

            for idx, column in enumerate(pred_columns):
                preds_val = self.X_neighbors.loc[_neighbors][column]
                target_val = self.y_neighbors.loc[_neighbors]
                _weights[idx] = weight_function(target_val, preds_val)
                if self.bias:
                    _biases[idx] = sum((target_val.T - preds_val) / len(target_val))
            predictions_ensembled.append(
                sum(((X_test[pred_columns].iloc[i] - _biases) * _weights.T))
                / sum(_weights)
            )

        return predictions_ensembled