1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
| def cal_user_sim(item_click_by_user,user_click_time): """ get user sim info :param item_click_by_user:dict, key itemid value:[itemid1,itemid2] :return: dict, key itemid, value:sict, value_key:itemid_j, value_value:simscore """ co_appear = {} user_click_count = {} for itemid, user_list in item_click_by_user.items(): for index_i in range(0, len(user_list)): user_i = user_list[index_i] user_click_count.setdefault(user_i, 0) user_click_count[user_i] += 1 if user_i + "_" + itemid not in user_click_time: click_time_one = 0 else: click_time_one = user_click_time[user_i + "_" + itemid]
for index_j in range(index_i + 1, len(user_list)): user_j = user_list[index_j] if user_j + "_" + itemid not in user_click_time: click_time_two = 0 else: click_time_two = user_click_time[user_j + "_" + itemid]
co_appear.setdefault(user_i, {}) co_appear[user_i].setdefault(user_j, 0) # co_appear[user_i][user_j] += base_contribution_score() # co_appear[user_i][user_j] += update_contribution_score(len(user_list)) co_appear[user_i][user_j] += update_two_contribution_score(click_time_one,click_time_two)
co_appear.setdefault(user_j, {}) co_appear[user_j].setdefault(user_i, 0) # co_appear[user_j][user_i] += base_contribution_score() # co_appear[user_j][user_i] += update_contribution_score(len(user_list)) co_appear[user_i][user_j] += update_two_contribution_score(click_time_one, click_time_two)
user_sim_info = {} user_sim_info_sorted = {} for user_i, relate_user in co_appear.items(): user_sim_info.setdefault(user_i, {}) for user_j, cotime in relate_user.items(): user_sim_info[user_i].setdefault(user_j, 0) user_sim_info[user_i][user_j] = cotime / math.sqrt(user_click_count[user_i] * user_click_count[user_j])
for user in user_sim_info: user_sim_info_sorted[user] = sorted(user_sim_info[user].items(), key=operator.itemgetter(1), reverse=True)
return user_sim_info_sorted
|