Source code for ensemblem.utils

import numpy as np


[docs]def split_sets(df, train_size, val_size, test_size, target): """ Split the data into train, validation and test sets with target and features Parameters ---------- :param df: pandas.DataFrame to be divided :param train_size: float, size of the train set :param val_size: float, size of the neighbours-set :param test_size: float, size of the test set :return: train, validation and test sets with target and features """ train, val, test = divide_sets(df, train_size, val_size, test_size) return ( train.drop(columns=[target]), train[target], val.drop(columns=[target]), val[target], test.drop(columns=[target]), test[target], )
[docs]def divide_sets(df, train_size, val_size, test_size): """ Divide the data into train, validation and test sets Parameters ---------- :param df: pandas.DataFrame to be divided :param train_size: float, size of the train set :param val_size: float, size of the neighbours-set :param test_size: float, size of the test set :return: train, validation and test sets """ train_size = int(train_size * len(df)) val_size = int(val_size * len(df)) test_size = int(test_size * len(df)) return ( df[:train_size], df[train_size:train_size + val_size], df[train_size+val_size:train_size+val_size+test_size], )