[Answered ]-Building user-based collaborative filtering system in Django

1👍

You can boost efficency a lot with:

def find_similar_users(user, k=5):
    all_users = User.objects.exclude(id=user.id).prefetch_related('purchase_set')
    similarities = [
        (other_user, jaccard_similarity(user, other_user))
        for other_user in all_users
    ]


def jaccard_similarity(user1, user2):
    user1_purchases = {
        purchase.product_id for purchase in user1.purchase_set.all()
    }
    user1_purchases = {
        purchase.product_id for purchase in user2.purchase_set.all()
    }

    intersection = user1_purchases.intersection(user2_purchases)
    union = user1_purchases.union(user2_purchases)

    return len(intersection) / len(union) if len(union) > 0 else 0

This will retrieve all Purchases in "bulk" and thus only make two queries, which is probably where the bottleneck is anyway.

Leave a comment