import pandas as pd
import numpy as np
[docs]def euclidean(point, data):
"""
Euclidean distance is the square root of the sum of the squared differences of their coordinates
:param point: point to calculate the distance
:param data: data to calculate the distance
:return: distance
"""
return np.sqrt(np.sum((data - point) ** 2, axis=1))
[docs]def euclidean_v(x, y):
"""
Vector Euclidean distance is the square root of the sum of the squared differences of their coordinates
:param x: point to calculate the distance
:param y: data to calculate the distance
:return: distance
"""
return np.sqrt(np.sum((x - y) ** 2))
[docs]def manhattan_v(x, y):
"""
Vector Manhattan distance is the sum of the absolute differences of their coordinates
:param x: point to calculate the distance
:param y: data to calculate the distance
"""
return np.sum(np.abs(x - y))
[docs]def cosine_v(x, y):
"""
Vector Cosine distance is 1 - cosine similarity
:param x: point to calculate the distance
:param y: data to calculate the distance
"""
return 1 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
[docs]def mean_absolute_percentage_error(actual, predicted):
"""
Local Mean Absolute Percentage Error (LMAPE)
:param actual: actual values
:param predicted: predicted values
"""
return np.mean(np.abs((actual - predicted) / actual))
[docs]def mean_absolute_error(actual, predicted):
"""
Mean Absolute Error (MAE)
:param actual: actual values
:param predicted: predicted values
"""
return np.mean(np.abs(predicted - actual.T).T)
[docs]def root_mean_squared_error(actual, predicted) -> float:
"""
Local Root Mean Squared Error (LRMSE)
"""
return np.sqrt(np.mean((predicted - actual.T).T) ** 2)
[docs]def mean_squared_error(actual, predicted) -> float:
"""
Mean Squared Error (MSE)
"""
return np.mean((predicted - actual.T).T) ** 2
[docs]def root_mean_squared_log_error(actual, predicted) -> float:
"""
Local Root Mean Squared Log Error (LRMSLE)
:param actual: actual values
:param predicted: predicted values
:return: RMSLE
"""
return np.sqrt(np.mean(np.square(np.log(predicted + 1) - np.log(actual + 1))))
[docs]def metrics_table(actual, predicted, model_name) -> pd.DataFrame:
"""
Create a table with pivot with results of multiple models and metrics
:param actual: actual values
:param predicted: predicted values
:param model_name: name of the model
:return: table with results
"""
metrics = pd.DataFrame(
columns=["Model", "MAPE", "MAE", "RMSE", "RMSLE"],
data=[
[
model_name,
mean_absolute_percentage_error(actual, predicted),
mean_absolute_error(actual, predicted),
root_mean_squared_error(actual, predicted),
root_mean_squared_log_error(actual, predicted),
]
],
)
return metrics # .to_markdown()