From b61d60f53f3494cfc34f652739b23b788534643a Mon Sep 17 00:00:00 2001 From: Theo Ilie Date: Tue, 10 May 2022 22:50:42 +0000 Subject: [PATCH 1/4] Paginate v1/full/users/content_node route --- discovery-provider/src/api/v1/users.py | 60 +++++++++++++++---- .../src/queries/get_users_cnode.py | 12 +++- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/discovery-provider/src/api/v1/users.py b/discovery-provider/src/api/v1/users.py index e5291082c18..84bed4e5e9c 100644 --- a/discovery-provider/src/api/v1/users.py +++ b/discovery-provider/src/api/v1/users.py @@ -828,7 +828,28 @@ def get(self, id): argument_class=DescriptiveArgument ) users_by_content_node_route_parser.add_argument( - "creator_node_endpoint", required=True, type=str + "creator_node_endpoint", + required=False, + type=str, + description="DEPRECATED -- use content_node_endpoint", +) +users_by_content_node_route_parser.add_argument( + "content_node_endpoint", + required=False, + type=str, + description="Get users who have this Content Node endpoint as their primary/secondary", +) +users_by_content_node_route_parser.add_argument( + "prev_user_id", + required=False, + type=int, + description="Minimum user_id to return. Used for pagination as the offset after sorting in ascending order by user_id", +) +users_by_content_node_route_parser.add_argument( + "max_users", + required=False, + type=int, + description="Maximum number of users to return (SQL LIMIT)", ) users_by_content_node_response = make_full_response( "users_by_content_node", full_ns, fields.List(fields.Nested(user_replica_set)) @@ -837,26 +858,41 @@ def get(self, id): @full_ns.route("/content_node/", doc=False) class UsersByContentNode(Resource): - @full_ns.marshal_with(users_by_content_node_response) - # @cache(ttl_sec=30) - def get(self, replica_type): - """New route to call get_users_cnode with replica_type param (only consumed by content node) - - Leaving `/users/creator_node` above untouched for backwards-compatibility - + @ns.doc( + id="""Get Users By Replica Type for Content Node""", + description=""" + (Only consumed by Content Node) Gets users that have a given Content Node endpoint as + their primary, secondary, or either (depending on the replica_type passed). Response = array of objects of schema { user_id, wallet, primary, secondary1, secondary2, primarySpId, secondary1SpID, secondary2SpID } - """ + """, + responses={200: "Success", 400: "Bad request", 500: "Server error"}, + ) + @full_ns.marshal_with(users_by_content_node_response) + # @cache(ttl_sec=30) + def get(self, replica_type): args = users_by_content_node_route_parser.parse_args() - cnode_url = args.get("creator_node_endpoint") + # Endpoint that a user's primary/secondary/either must be set to for them to be included in the results + cnode_url = args.get("content_node_endpoint") or args.get( + "creator_node_endpoint" + ) + # Offset after sorting user_id in ascending order (using > comparison in SQL query) + prev_user_id = args.get("prev_user_id") or 0 + # LIMIT used in SQL query + max_users = args.get("max_users") or 100000 if replica_type == "primary": - users = get_users_cnode(cnode_url, ReplicaType.PRIMARY) + users = get_users_cnode( + cnode_url, ReplicaType.PRIMARY, prev_user_id, max_users + ) elif replica_type == "secondary": - users = get_users_cnode(cnode_url, ReplicaType.SECONDARY) + users = get_users_cnode( + cnode_url, ReplicaType.SECONDARY, prev_user_id, max_users + ) else: - users = get_users_cnode(cnode_url, ReplicaType.ALL) + users = get_users_cnode(cnode_url, ReplicaType.ALL, prev_user_id, max_users) return success_response(users) diff --git a/discovery-provider/src/queries/get_users_cnode.py b/discovery-provider/src/queries/get_users_cnode.py index eb74f4a6951..a2dbec51918 100644 --- a/discovery-provider/src/queries/get_users_cnode.py +++ b/discovery-provider/src/queries/get_users_cnode.py @@ -13,7 +13,12 @@ class ReplicaType(Enum): ALL = 3 -def get_users_cnode(cnode_endpoint_string, replica_type=ReplicaType.PRIMARY): +def get_users_cnode( + cnode_endpoint_string, + replica_type=ReplicaType.PRIMARY, + prev_user_id=0, + max_users=100000, +): """ Query all users with `cnode_endpoint_string` in replica set If replica_type=ReplicaType.PRIMARY -> returns users with `cnode_endpoint_string` as primary @@ -67,7 +72,10 @@ def get_users_cnode(cnode_endpoint_string, replica_type=ReplicaType.PRIMARY): 't.secondary1 = :cnode_endpoint_string OR ' 't.secondary2 = :cnode_endpoint_string) AND' } - t.secondary1 is not NULL; + t.user_id > { prev_user_id } AND + t.secondary1 is not NULL + ORDER BY "user_id" + LIMIT { max_users }; """ ) users = session.execute( From 7307092b9cd0ee62f68c47818980f297072d7fe1 Mon Sep 17 00:00:00 2001 From: Theo Ilie Date: Wed, 11 May 2022 16:10:44 +0000 Subject: [PATCH 2/4] Sid's comments --- discovery-provider/src/api/v1/users.py | 14 +++------ .../src/queries/get_users_cnode.py | 29 ++++++++++++++----- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/discovery-provider/src/api/v1/users.py b/discovery-provider/src/api/v1/users.py index 84bed4e5e9c..9a9438821b4 100644 --- a/discovery-provider/src/api/v1/users.py +++ b/discovery-provider/src/api/v1/users.py @@ -829,13 +829,7 @@ def get(self, id): ) users_by_content_node_route_parser.add_argument( "creator_node_endpoint", - required=False, - type=str, - description="DEPRECATED -- use content_node_endpoint", -) -users_by_content_node_route_parser.add_argument( - "content_node_endpoint", - required=False, + required=True, type=str, description="Get users who have this Content Node endpoint as their primary/secondary", ) @@ -870,7 +864,7 @@ class UsersByContentNode(Resource): responses={200: "Success", 400: "Bad request", 500: "Server error"}, ) @full_ns.marshal_with(users_by_content_node_response) - # @cache(ttl_sec=30) + @cache(ttl_sec=30) def get(self, replica_type): args = users_by_content_node_route_parser.parse_args() @@ -879,9 +873,9 @@ def get(self, replica_type): "creator_node_endpoint" ) # Offset after sorting user_id in ascending order (using > comparison in SQL query) - prev_user_id = args.get("prev_user_id") or 0 + prev_user_id = args.get("prev_user_id") # LIMIT used in SQL query - max_users = args.get("max_users") or 100000 + max_users = args.get("max_users") if replica_type == "primary": users = get_users_cnode( diff --git a/discovery-provider/src/queries/get_users_cnode.py b/discovery-provider/src/queries/get_users_cnode.py index a2dbec51918..e8c21c44103 100644 --- a/discovery-provider/src/queries/get_users_cnode.py +++ b/discovery-provider/src/queries/get_users_cnode.py @@ -13,11 +13,12 @@ class ReplicaType(Enum): ALL = 3 +# TODO: Enforce default max_users after all CNs are updated to a version with this PR: https://github.com/AudiusProject/audius-protocol/pull/3064 def get_users_cnode( cnode_endpoint_string, - replica_type=ReplicaType.PRIMARY, - prev_user_id=0, - max_users=100000, + replica_type, + prev_user_id, + max_users, ): """ Query all users with `cnode_endpoint_string` in replica set @@ -72,14 +73,28 @@ def get_users_cnode( 't.secondary1 = :cnode_endpoint_string OR ' 't.secondary2 = :cnode_endpoint_string) AND' } - t.user_id > { prev_user_id } AND t.secondary1 is not NULL - ORDER BY "user_id" - LIMIT { max_users }; + { + "" + if prev_user_id is None + else + "AND t.user_id > :prev_user_id" + } + { + "" + if max_users is None + else + "LIMIT :max_users" + } + ; """ ) users = session.execute( - users_res, {"cnode_endpoint_string": cnode_endpoint_string} + users_res, { + "cnode_endpoint_string": cnode_endpoint_string, + "prev_user_id": prev_user_id, + "max_users": max_users + } ).fetchall() users_dict = [dict(row) for row in users] return users_dict From 1bcf233e34e5ede5c2d1813a620c8032842e3379 Mon Sep 17 00:00:00 2001 From: Theo Ilie Date: Wed, 11 May 2022 16:12:05 +0000 Subject: [PATCH 3/4] Sid's comments --- discovery-provider/src/queries/get_users_cnode.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/discovery-provider/src/queries/get_users_cnode.py b/discovery-provider/src/queries/get_users_cnode.py index e8c21c44103..a97bed0bea5 100644 --- a/discovery-provider/src/queries/get_users_cnode.py +++ b/discovery-provider/src/queries/get_users_cnode.py @@ -90,11 +90,12 @@ def get_users_cnode( """ ) users = session.execute( - users_res, { - "cnode_endpoint_string": cnode_endpoint_string, - "prev_user_id": prev_user_id, - "max_users": max_users - } + users_res, + { + "cnode_endpoint_string": cnode_endpoint_string, + "prev_user_id": prev_user_id, + "max_users": max_users, + }, ).fetchall() users_dict = [dict(row) for row in users] return users_dict From 7bf282d2960b8d79a2e16651c9525063085a8989 Mon Sep 17 00:00:00 2001 From: Theo Ilie Date: Thu, 12 May 2022 21:13:10 +0000 Subject: [PATCH 4/4] Bump cache to 5min and cleanup --- discovery-provider/src/api/v1/users.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/discovery-provider/src/api/v1/users.py b/discovery-provider/src/api/v1/users.py index 9a9438821b4..f21f9d1f8a2 100644 --- a/discovery-provider/src/api/v1/users.py +++ b/discovery-provider/src/api/v1/users.py @@ -864,15 +864,13 @@ class UsersByContentNode(Resource): responses={200: "Success", 400: "Bad request", 500: "Server error"}, ) @full_ns.marshal_with(users_by_content_node_response) - @cache(ttl_sec=30) + @cache(ttl_sec=5 * 60) def get(self, replica_type): args = users_by_content_node_route_parser.parse_args() # Endpoint that a user's primary/secondary/either must be set to for them to be included in the results - cnode_url = args.get("content_node_endpoint") or args.get( - "creator_node_endpoint" - ) - # Offset after sorting user_id in ascending order (using > comparison in SQL query) + cnode_url = args.get("creator_node_endpoint") + # Used for pagination with ">" comparison in SQL query. See https://ivopereira.net/efficient-pagination-dont-use-offset-limit prev_user_id = args.get("prev_user_id") # LIMIT used in SQL query max_users = args.get("max_users")