From 7d810f9b689b6ef6522746d5bf9b2dec8f6f9b5e Mon Sep 17 00:00:00 2001 From: Saliou Diallo Date: Wed, 21 Sep 2022 13:11:23 -0400 Subject: [PATCH 1/3] Update filter logic for trending premium content --- .../src/queries/get_trending_tracks.py | 67 ++++++++++--------- .../src/queries/get_underground_trending.py | 33 ++++----- 2 files changed, 52 insertions(+), 48 deletions(-) diff --git a/discovery-provider/src/queries/get_trending_tracks.py b/discovery-provider/src/queries/get_trending_tracks.py index 0f78e6f1462..5a5162aba7c 100644 --- a/discovery-provider/src/queries/get_trending_tracks.py +++ b/discovery-provider/src/queries/get_trending_tracks.py @@ -2,6 +2,7 @@ from sqlalchemy import desc from sqlalchemy.orm.session import Session +from src.models.tracks.track import Track from src.models.tracks.track_trending_score import TrackTrendingScore from src.premium_content.premium_content_constants import ( SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS, @@ -48,30 +49,30 @@ def generate_unpopulated_trending( strategy.get_track_score(time_range, track) for track in trending_tracks["listen_counts"] ] + + # If exclude_premium is true, then filter out track ids + # belonging to premium tracks before applying the limit. + if exclude_premium: + ids = [track["track_id"] for track in track_scores] + non_premium_track_ids = ( + session.query(Track.track_id) + .filter(Track.track_id.in_(ids), Track.is_premium == False) + .all() + ) + non_premium_track_id_set = set(map(lambda t: t[0], non_premium_track_ids)) + track_scores = list( + filter(lambda t: t["track_id"] in non_premium_track_id_set, track_scores) + ) + sorted_track_scores = sorted( track_scores, key=lambda k: (k["score"], k["track_id"]), reverse=True ) - - # Re apply the limit just in case we did decide to include more tracks in the scoring than the limit - # Only limit the number of sorted tracks here if we are not later - # filtering out the premium tracks. Otherwise, the number of - # tracks we return later may be smaller than the limit. - # If we don't limit it here, we limit it later after getting the - # unpopulated tracks. - should_apply_limit_early = True # not exclude_premium - if should_apply_limit_early: - sorted_track_scores = sorted_track_scores[:limit] + sorted_track_scores = sorted_track_scores[:limit] # Get unpopulated metadata track_ids = [track["track_id"] for track in sorted_track_scores] tracks = get_unpopulated_tracks(session, track_ids, exclude_premium=exclude_premium) - # Make sure to apply the limit if not previously applied - # because of the filtering out of premium tracks - if not should_apply_limit_early: - tracks = tracks[:limit] - track_ids = [track["track_id"] for track in tracks] - return (tracks, track_ids) @@ -103,13 +104,23 @@ def generate_unpopulated_trending_from_mat_views( TrackTrendingScore.genre == genre ) - # Only limit the number of sorted tracks here if we are not later - # filtering out the premium tracks. Otherwise, the number of - # tracks we return later may be smaller than the limit. - # If we don't limit it here, we limit it later after getting the - # unpopulated tracks. - should_apply_limit_early = True # not exclude_premium - if should_apply_limit_early: + # If exclude_premium is true, then filter out track ids belonging to + # premium tracks before applying the limit. + if exclude_premium: + trending_track_ids = ( + trending_track_ids_query.join( + Track, Track.track_id == trending_track_ids_query.c.track_id + ) + .filter( + Track.is_current == True, + Track.is_delete == False, + Track.is_premium == False, + ) + .order_by(desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id)) + .limit(limit) + .all() + ) + else: trending_track_ids = ( trending_track_ids_query.order_by( desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id) @@ -117,21 +128,11 @@ def generate_unpopulated_trending_from_mat_views( .limit(limit) .all() ) - else: - trending_track_ids = trending_track_ids_query.order_by( - desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id) - ).all() # Get unpopulated metadata track_ids = [track_id[0] for track_id in trending_track_ids] tracks = get_unpopulated_tracks(session, track_ids, exclude_premium=exclude_premium) - # Make sure to apply the limit if not previously applied - # because of the filtering out of premium tracks - if not should_apply_limit_early: - tracks = tracks[:limit] - track_ids = [track["track_id"] for track in tracks] - return (tracks, track_ids) diff --git a/discovery-provider/src/queries/get_underground_trending.py b/discovery-provider/src/queries/get_underground_trending.py index f5f48c01434..c9d1aa3995b 100644 --- a/discovery-provider/src/queries/get_underground_trending.py +++ b/discovery-provider/src/queries/get_underground_trending.py @@ -195,16 +195,25 @@ def wrapped(): scored_tracks = [ strategy.get_track_score("week", track) for track in track_scoring_data ] - sorted_tracks = sorted(scored_tracks, key=lambda k: k["score"], reverse=True) - # Only limit the number of sorted tracks here if we are not later - # filtering out the premium tracks. Otherwise, the number of - # tracks we return later may be smaller than the limit. - # If we don't limit it here, we limit it later after getting the - # unpopulated tracks. - should_apply_limit_early = True # not SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS - if should_apply_limit_early: - sorted_tracks = sorted_tracks[:UNDERGROUND_TRENDING_LENGTH] + # If SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS is true, then filter out track ids + # belonging to premium tracks before applying the limit. + if SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS: + ids = [track["track_id"] for track in scored_tracks] + non_premium_track_ids = ( + session.query(Track.track_id) + .filter(Track.track_id.in_(ids), Track.is_premium == False) + .all() + ) + non_premium_track_id_set = set(map(lambda t: t[0], non_premium_track_ids)) + scored_tracks = list( + filter( + lambda t: t["track_id"] in non_premium_track_id_set, scored_tracks + ) + ) + + sorted_tracks = sorted(scored_tracks, key=lambda k: k["score"], reverse=True) + sorted_tracks = sorted_tracks[:UNDERGROUND_TRENDING_LENGTH] # Get unpopulated metadata track_ids = [track["track_id"] for track in sorted_tracks] @@ -214,12 +223,6 @@ def wrapped(): exclude_premium=SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS, ) - # Make sure to apply the limit if not previously applied - # because of the filtering out of premium tracks - if not should_apply_limit_early: - tracks = tracks[:UNDERGROUND_TRENDING_LENGTH] - track_ids = [track["track_id"] for track in tracks] - return (tracks, track_ids) return wrapped From 5709dfaae47a39712e0e372c135bcc04bdfca729 Mon Sep 17 00:00:00 2001 From: Saliou Diallo Date: Thu, 22 Sep 2022 16:22:38 -0400 Subject: [PATCH 2/3] Reduce db round trip --- .../src/queries/get_underground_trending.py | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/discovery-provider/src/queries/get_underground_trending.py b/discovery-provider/src/queries/get_underground_trending.py index c9d1aa3995b..cfe5fc9bebd 100644 --- a/discovery-provider/src/queries/get_underground_trending.py +++ b/discovery-provider/src/queries/get_underground_trending.py @@ -64,6 +64,7 @@ def get_scorable_track_data(session, redis_instance, strategy): "karma": number "listens": number "owner_verified": boolean + "is_premium": boolean } """ @@ -108,6 +109,7 @@ def get_scorable_track_data(session, redis_instance, strategy): AggregatePlay.count, Track.created_at, follower_query.c.is_verified, + Track.is_premium, ) .join(Track, Track.track_id == AggregatePlay.play_item_id) .join(follower_query, follower_query.c.user_id == Track.owner_id) @@ -139,6 +141,7 @@ def get_scorable_track_data(session, redis_instance, strategy): "karma": 1, "listens": record[3], "owner_verified": record[5], + "is_premium": record[6], } for record in base_query } @@ -192,26 +195,17 @@ def make_get_unpopulated_tracks(session, redis_instance, strategy): def wrapped(): # Score and sort track_scoring_data = get_scorable_track_data(session, redis_instance, strategy) - scored_tracks = [ - strategy.get_track_score("week", track) for track in track_scoring_data - ] # If SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS is true, then filter out track ids # belonging to premium tracks before applying the limit. if SHOULD_TRENDING_EXCLUDE_PREMIUM_TRACKS: - ids = [track["track_id"] for track in scored_tracks] - non_premium_track_ids = ( - session.query(Track.track_id) - .filter(Track.track_id.in_(ids), Track.is_premium == False) - .all() - ) - non_premium_track_id_set = set(map(lambda t: t[0], non_premium_track_ids)) - scored_tracks = list( - filter( - lambda t: t["track_id"] in non_premium_track_id_set, scored_tracks - ) + track_scoring_data = list( + filter(lambda item: not item["is_premium"], track_scoring_data) ) + scored_tracks = [ + strategy.get_track_score("week", track) for track in track_scoring_data + ] sorted_tracks = sorted(scored_tracks, key=lambda k: k["score"], reverse=True) sorted_tracks = sorted_tracks[:UNDERGROUND_TRENDING_LENGTH] From 78508401e2ee459ce63c8a48a7b09abcc0eb396e Mon Sep 17 00:00:00 2001 From: Saliou Diallo Date: Thu, 22 Sep 2022 23:51:05 -0400 Subject: [PATCH 3/3] Fix queries --- .../src/queries/get_trending_tracks.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/discovery-provider/src/queries/get_trending_tracks.py b/discovery-provider/src/queries/get_trending_tracks.py index 5a5162aba7c..82ac3351327 100644 --- a/discovery-provider/src/queries/get_trending_tracks.py +++ b/discovery-provider/src/queries/get_trending_tracks.py @@ -56,7 +56,12 @@ def generate_unpopulated_trending( ids = [track["track_id"] for track in track_scores] non_premium_track_ids = ( session.query(Track.track_id) - .filter(Track.track_id.in_(ids), Track.is_premium == False) + .filter( + Track.track_id.in_(ids), + Track.is_current == True, + Track.is_delete == False, + Track.is_premium == False, + ) .all() ) non_premium_track_id_set = set(map(lambda t: t[0], non_premium_track_ids)) @@ -107,16 +112,26 @@ def generate_unpopulated_trending_from_mat_views( # If exclude_premium is true, then filter out track ids belonging to # premium tracks before applying the limit. if exclude_premium: + trending_track_ids_subquery = trending_track_ids_query.subquery() trending_track_ids = ( - trending_track_ids_query.join( - Track, Track.track_id == trending_track_ids_query.c.track_id + session.query( + trending_track_ids_subquery.c.track_id, + trending_track_ids_subquery.c.score, + Track.track_id, + ) + .join( + trending_track_ids_subquery, + Track.track_id == trending_track_ids_subquery.c.track_id, ) .filter( Track.is_current == True, Track.is_delete == False, Track.is_premium == False, ) - .order_by(desc(TrackTrendingScore.score), desc(TrackTrendingScore.track_id)) + .order_by( + desc(trending_track_ids_subquery.c.score), + desc(trending_track_ids_subquery.c.track_id), + ) .limit(limit) .all() )