Source code for ensemblem.metrics

import pandas as pd
import numpy as np


[docs]def euclidean(point, data):
    """
    Euclidean distance is the square root of the sum of the squared differences of their coordinates

    :param point: point to calculate the distance
    :param data: data to calculate the distance

    :return: distance
    """
    return np.sqrt(np.sum((data - point) ** 2, axis=1))


[docs]def euclidean_v(x, y):
    """
    Vector Euclidean distance is the square root of the sum of the squared differences of their coordinates

    :param x: point to calculate the distance
    :param y: data to calculate the distance

    :return: distance
    """
    return np.sqrt(np.sum((x - y) ** 2))


[docs]def manhattan_v(x, y):
    """
    Vector Manhattan distance is the sum of the absolute differences of their coordinates

    :param x: point to calculate the distance
    :param y: data to calculate the distance
    """
    return np.sum(np.abs(x - y))


[docs]def cosine_v(x, y):
    """
    Vector Cosine distance is 1 - cosine similarity

    :param x: point to calculate the distance
    :param y: data to calculate the distance
    """
    return 1 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))


[docs]def mean_absolute_percentage_error(actual, predicted):
    """
    Local Mean Absolute Percentage Error (LMAPE)

    :param actual: actual values
    :param predicted: predicted values
    """
    return np.mean(np.abs((actual - predicted) / actual))


[docs]def mean_absolute_error(actual, predicted):
    """
    Mean Absolute Error (MAE)

    :param actual: actual values
    :param predicted: predicted values

    """
    return np.mean(np.abs(predicted - actual.T).T)


[docs]def root_mean_squared_error(actual, predicted) -> float:
    """
    Local Root Mean Squared Error (LRMSE)
    """
    return np.sqrt(np.mean((predicted - actual.T).T) ** 2)


[docs]def mean_squared_error(actual, predicted) -> float:
    """
    Mean Squared Error (MSE)
    """
    return np.mean((predicted - actual.T).T) ** 2


[docs]def root_mean_squared_log_error(actual, predicted) -> float:
    """
    Local Root Mean Squared Log Error (LRMSLE)
    :param actual: actual values
    :param predicted: predicted values
    :return: RMSLE
    """
    return np.sqrt(np.mean(np.square(np.log(predicted + 1) - np.log(actual + 1))))


[docs]def metrics_table(actual, predicted, model_name) -> pd.DataFrame:
    """
    Create a table with pivot with results of multiple models and metrics

    :param actual: actual values
    :param predicted: predicted values
    :param model_name: name of the model

    :return: table with results
    """
    metrics = pd.DataFrame(
        columns=["Model", "MAPE", "MAE", "RMSE", "RMSLE"],
        data=[
            [
                model_name,
                mean_absolute_percentage_error(actual, predicted),
                mean_absolute_error(actual, predicted),
                root_mean_squared_error(actual, predicted),
                root_mean_squared_log_error(actual, predicted),
            ]
        ],
    )

    return metrics  # .to_markdown()