Source code for featurehub.modeling.scorers

import numpy as np
from sklearn.metrics import mean_squared_error
import sklearn.metrics

#
# Normalized Discounted Cumulative Gain metric
#
[docs]def ndcg_score(y_true, y_pred, k=5):
    """Normalized discounted cumulative gain (NDCG) at rank k

    This specific score function operates under the assumption that the
    relevance for the correct label is 1 and the relevance for all other labels
    is 0.

    Parameters
    ----------
    y_true : array-like, shape = [n_samples,]
        Ground truth (true relevance labels). These must be encoded to integer
        values, by using LabelEncoder, for example.

    y_pred : array-like, shape = [n_samples, n_classes]
        Probability predictions for each class.

    k : int
        Rank.

    Returns
    -------
    NDCG @k : float
    """
    y_pred_topk = np.fliplr(np.argsort(y_pred))[:,:k]
    pos = np.where(np.sum(y_pred_topk==y_true[:,None],1) > 0,
                 np.argmax(y_pred_topk==y_true[:,None],1),
                 np.nan)
    scores = [1.0/np.log2((i+1)+1) if not np.isnan(i) else 0 for i in pos]
    return np.mean(scores)

ndcg_scorer = sklearn.metrics.make_scorer(ndcg_score,
    greater_is_better=True, needs_proba=True)

#
# Root mean squared log error
#

[docs]def rmsle_score(y_true, y_pred, **kwargs):
    return np.sqrt(mean_squared_error(np.log(y_pred + 1), np.log(y_true + 1),
        **kwargs))

rmsle_scorer = sklearn.metrics.make_scorer(rmsle_score,
    greater_is_better=False, needs_proba=False)