Source code for ensemblem.model

import numpy as np
import pandas
from .weights_functions import *
from .metrics import *
from sklearn.preprocessing import MinMaxScaler
from typing import List


[docs]class KWEnsembler: """ KWEnsembler class This class implements the K-Weighted Ensembler model. It is an ensemble model that uses the k-nearest neighbors of a sample to predict its target value. The weights of the neighbors are calculated using a weight function. The bias of the neighbors can be added to the prediction. :param k: number of neighbors to use :param bias: whether to add the bias of the neighbors to the prediction :param dist_metric: distance metric to use :return: Predictions of the target values for the test set :rtype: bytearray """ def __init__(self, k: int = 5, bias: bool = False, dist_metric=euclidean): self.k = k self.bias = bias self.dist_metric = dist_metric
[docs] def fit( self, X_neighbors: pandas.DataFrame, y_neighbors: pandas.DataFrame, features: List, range_min: int = 0, range_max: int = 1, ) -> None: """ Fits the ensemble by creating the search space Parameters ---------- :param X_neighbors: Neighbors search space :param y_neighbors: Neighbors search space Target values """ self.X_neighbors = X_neighbors self.y_neighbors = y_neighbors self.x_scaler = MinMaxScaler((range_min, range_max)) self.X_neighbors[features] = self.x_scaler.fit_transform( self.X_neighbors[features] )
def _find_similar_neighbors( self, test_sample: pandas.Series, similar_space: pandas.DataFrame ) -> List: """ Finds the k nearest neighbors of x in the similar_space Parameters ---------- :param x: Sample to find the neighbors of :param similar_space: Search space :return: Indices of the k nearest neighbors """ distances = self.dist_metric(test_sample, similar_space) y_sorted = [y for _, y in sorted(zip(distances, distances.index))] return y_sorted[: self.k]
[docs] def predict( self, X_test: pandas.DataFrame, features: List, pred_columns: List, weight_function=w_inverse_LMAE, ) -> List: """ Predicts the target values for the test set using the ensemble method :param X_test: Test set :param features: Features of the test set :param pred_columns: Columns to predict :param weight_function: Weight function to use :param range_min: Minimum value of minmax scaling :param range_max: Maximum value of minmax scaling :return: Predictions of the target values for the test set """ X_test[features] = self.x_scaler.transform(X_test[features]) predictions_ensembled = [] for i in range(len(X_test)): _weights = np.zeros(len(pred_columns)) _biases = np.zeros(len(pred_columns)) _neighbors = self._find_similar_neighbors( X_test[features].iloc[i], self.X_neighbors[features] ) for idx, column in enumerate(pred_columns): preds_val = self.X_neighbors.loc[_neighbors][column] target_val = self.y_neighbors.loc[_neighbors] _weights[idx] = weight_function(target_val, preds_val) if self.bias: _biases[idx] = sum((target_val.T - preds_val) / len(target_val)) predictions_ensembled.append( sum(((X_test[pred_columns].iloc[i] - _biases) * _weights.T)) / sum(_weights) ) return predictions_ensembled