Source code for rankfm.evaluation

"""
rankfm model tuning and evaluation functions
"""

import numpy as np
import pandas as pd
from rankfm.utils import get_data

[docs]def hit_rate(model, test_interactions, k=10, filter_previous=False): """evaluate hit-rate (any match) wrt out-of-sample observed interactions :param model: trained RankFM model instance :param test_interactions: pandas dataframe of out-of-sample observed user/item interactions :param k: number of recommendations to generate for each user :param filter_previous: remove observed training items from generated recommendations :return: the hit rate or proportion of test users with any matching items """ # ensure that the model has been fit before attempting to generate predictions assert model.is_fit, "you must fit the model prior to evaluating hold-out metrics" # transform interactions into a user -> items dictionary test_user_items = pd.DataFrame(get_data(test_interactions), columns=['user_id', 'item_id']) test_user_items = test_user_items.groupby('user_id')['item_id'].apply(set).to_dict() test_users = list(test_user_items.keys()) # generate topK recommendations for all test users also present in the training data test_recs = model.recommend(users=test_users, n_items=k, filter_previous=filter_previous, cold_start='drop') comm_user = test_recs.index.values # calculate the hit rate (percentage of users with any relevant recommendation) wrt common users hit_rate = np.mean([int(len(set(test_recs.loc[u]) & test_user_items[u]) > 0) for u in comm_user]) return hit_rate
[docs]def reciprocal_rank(model, test_interactions, k=10, filter_previous=False): """evaluate reciprocal rank wrt out-of-sample observed interactions :param model: trained RankFM model instance :param test_interactions: pandas dataframe of out-of-sample observed user/item interactions :param k: number of recommendations to generate for each user :param filter_previous: remove observed training items from generated recommendations :return: mean reciprocal rank wrt the test users """ # ensure that the model has been fit before attempting to generate predictions assert model.is_fit, "you must fit the model prior to evaluating hold-out metrics" # transform interactions into a user -> items dictionary test_user_items = pd.DataFrame(get_data(test_interactions), columns=['user_id', 'item_id']) test_user_items = test_user_items.groupby('user_id')['item_id'].apply(set).to_dict() test_users = list(test_user_items.keys()) # generate topK recommendations for all test users also present in the training data test_recs = model.recommend(users=test_users, n_items=k, filter_previous=filter_previous, cold_start='drop') comm_user = test_recs.index.values # calculate the reciprocal rank (inverse rank of the first relevant recommended item) wrt common users match_indexes = [np.where(test_recs.loc[u].isin(set(test_recs.loc[u]) & test_user_items[u]))[0] for u in comm_user] reciprocal_rank = np.mean([1 / (np.min(index) + 1) if len(index) > 0 else 0 for index in match_indexes]) return reciprocal_rank
[docs]def discounted_cumulative_gain(model, test_interactions, k=10, filter_previous=False): """evaluate discounted cumulative gain wrt out-of-sample observed interactions :param model: trained RankFM model instance :param test_interactions: pandas dataframe of out-of-sample observed user/item interactions :param k: number of recommendations to generate for each user :param filter_previous: remove observed training items from generated recommendations :return: mean discounted cumulative gain wrt the test users """ # ensure that the model has been fit before attempting to generate predictions assert model.is_fit, "you must fit the model prior to evaluating hold-out metrics" # transform interactions into a user -> items dictionary test_user_items = pd.DataFrame(get_data(test_interactions), columns=['user_id', 'item_id']) test_user_items = test_user_items.groupby('user_id')['item_id'].apply(set).to_dict() test_users = list(test_user_items.keys()) # generate topK recommendations for all test users also present in the training data test_recs = model.recommend(users=test_users, n_items=k, filter_previous=filter_previous, cold_start='drop') comm_user = test_recs.index.values # calculate the discounted cumulative gain (sum of inverse log scaled ranks of relevant items) wrt common users match_indexes = [np.where(test_recs.loc[u].isin(set(test_recs.loc[u]) & test_user_items[u]))[0] for u in comm_user] discounted_cumulative_gain = np.mean([np.sum(1 / np.log2(index + 2)) if len(index) > 0 else 0 for index in match_indexes]) return discounted_cumulative_gain
[docs]def precision(model, test_interactions, k=10, filter_previous=False): """evaluate precision wrt out-of-sample observed interactions :param model: trained RankFM model instance :param test_interactions: pandas dataframe of out-of-sample observed user/item interactions :param k: number of recommendations to generate for each user :param filter_previous: remove observed training items from generated recommendations :return: mean precision wrt the test users """ # ensure that the model has been fit before attempting to generate predictions assert model.is_fit, "you must fit the model prior to evaluating hold-out metrics" # transform interactions into a user -> items dictionary test_user_items = pd.DataFrame(get_data(test_interactions), columns=['user_id', 'item_id']) test_user_items = test_user_items.groupby('user_id')['item_id'].apply(set).to_dict() test_users = list(test_user_items.keys()) # generate topK recommendations for all test users also present in the training data test_recs = model.recommend(users=test_users, n_items=k, filter_previous=filter_previous, cold_start='drop') comm_user = test_recs.index.values # calculate average precision wrt common users precision = np.mean([len(set(test_recs.loc[u]) & test_user_items[u]) / len(test_recs.loc[u]) for u in comm_user]) return precision
[docs]def recall(model, test_interactions, k=10, filter_previous=False): """evaluate recall wrt out-of-sample observed interactions :param model: trained RankFM model instance :param test_interactions: pandas dataframe of out-of-sample observed user/item interactions :param k: number of recommendations to generate for each user :param filter_previous: remove observed training items from generated recommendations :return: mean recall wrt the test users """ # ensure that the model has been fit before attempting to generate predictions assert model.is_fit, "you must fit the model prior to evaluating hold-out metrics" # transform interactions into a user -> items dictionary test_user_items = pd.DataFrame(get_data(test_interactions), columns=['user_id', 'item_id']) test_user_items = test_user_items.groupby('user_id')['item_id'].apply(set).to_dict() test_users = list(test_user_items.keys()) # generate topK recommendations for all test users also present in the training data test_recs = model.recommend(users=test_users, n_items=k, filter_previous=filter_previous, cold_start='drop') comm_user = test_recs.index.values # calculate average recall across wrt common users recall = np.mean([len(set(test_recs.loc[u]) & test_user_items[u]) / len(test_user_items[u]) for u in comm_user]) return recall
[docs]def diversity(model, test_interactions, k=10, filter_previous=False): """evaluate the diversity of the model recommendations :param model: trained RankFM model instance :param test_interactions: pandas dataframe of out-of-sample observed user/item interactions :param k: number of recommendations to generate for each user :param filter_previous: remove observed training items from generated recommendations :return: dataframe of cnt/pct of users recommended for each item """ # ensure that the model has been fit before attempting to generate predictions assert model.is_fit, "you must fit the model prior to evaluating hold-out metrics" # get the unique set of test users test_user_items = pd.DataFrame(get_data(test_interactions), columns=['user_id', 'item_id']) test_users = test_user_items['user_id'].unique() # generate topK recommendations for all test users also present in the training data test_recs = model.recommend(users=test_users, n_items=k, filter_previous=filter_previous, cold_start='drop') comm_user = test_recs.index.values # stack the recommendations long-format for aggregation test_recs = test_recs.stack().reset_index().drop('level_1', axis=1) test_recs.columns = ['user_id', 'item_id'] # calculate the number and percentage of users getting recommended each unique item user_counts = test_recs.groupby('item_id')['user_id'].count().to_frame('cnt_users') user_counts = user_counts.reindex(model.item_id.values, fill_value=0).sort_values('cnt_users', ascending=False).reset_index() user_counts['pct_users'] = user_counts['cnt_users'] / len(comm_user) return user_counts