-
Notifications
You must be signed in to change notification settings - Fork 76
LCORE-1062: reconnection logic in quota runner #921
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,11 @@ | |
|
|
||
| logger = get_logger(__name__) | ||
|
|
||
| DATABASE_RECONNECTION_COUNT: int = 10 | ||
| RECONNECTION_DELAY: int = 1 | ||
|
|
||
|
|
||
| # pylint: disable=R0912 | ||
| def quota_scheduler(config: QuotaHandlersConfiguration) -> bool: | ||
| """ | ||
| Run the quota scheduler loop that applies configured quota limiters periodically. | ||
|
|
@@ -55,8 +59,15 @@ def quota_scheduler(config: QuotaHandlersConfiguration) -> bool: | |
| logger.warning("No limiters are setup, skipping") | ||
| return False | ||
|
|
||
| connection = connect(config) | ||
| if connection is None: | ||
| for _ in range(DATABASE_RECONNECTION_COUNT): | ||
| try: | ||
| connection = connect(config) | ||
| if connection is not None: | ||
| break | ||
| except Exception as e: # pylint: disable=broad-exception-caught | ||
| logger.warning("Can not connect to database, will try later: %s", e) | ||
| sleep(RECONNECTION_DELAY) | ||
| else: | ||
| logger.warning("Can not connect to database, skipping") | ||
| return False | ||
|
|
||
|
|
@@ -83,6 +94,16 @@ def quota_scheduler(config: QuotaHandlersConfiguration) -> bool: | |
| logger.info("Quota scheduler sync started") | ||
| for limiter in config.limiters: | ||
| try: | ||
| if not connected(connection): | ||
| # the old connection might be closed to avoid resource leaks | ||
| try: | ||
| connection.close() | ||
| except Exception: # pylint: disable=broad-exception-caught | ||
| pass # Connection already dead | ||
| connection = connect(config) | ||
| if connection is None: | ||
| logger.warning("Can not connect to database, skipping") | ||
| continue | ||
|
Comment on lines
+97
to
+106
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reinitialize tables after reconnecting to database. When the connection is lost and re-established, the quota table may not exist (e.g., if the database was recreated or wiped). Without calling Apply this diff to ensure tables are initialized after reconnection: if not connected(connection):
# the old connection might be closed to avoid resource leaks
try:
connection.close()
except Exception: # pylint: disable=broad-exception-caught
pass # Connection already dead
connection = connect(config)
if connection is None:
logger.warning("Can not connect to database, skipping")
continue
+ # Reinitialize tables after reconnection
+ if create_quota_table is not None:
+ init_tables(connection, create_quota_table)
quota_revocation(
connection, limiter, increase_quota_statement, reset_quota_statement
)🤖 Prompt for AI Agents |
||
| quota_revocation( | ||
| connection, limiter, increase_quota_statement, reset_quota_statement | ||
| ) | ||
|
|
@@ -95,6 +116,30 @@ def quota_scheduler(config: QuotaHandlersConfiguration) -> bool: | |
| return True | ||
|
|
||
|
|
||
| def connected(connection: Any) -> bool: | ||
| """Check if DB is still connected. | ||
|
|
||
| Parameters: | ||
| connection: Database connection object to verify. | ||
|
|
||
| Returns: | ||
| bool: True if connection is active, False otherwise. | ||
| """ | ||
| if connection is None: | ||
| logger.warning("Not connected, need to reconnect later") | ||
| return False | ||
| try: | ||
| # for compatibility with SQLite it is not possible to use context manager there | ||
| cursor = connection.cursor() | ||
| cursor.execute("SELECT 1") | ||
| cursor.close() | ||
| logger.info("Connection to storage is ok") | ||
| return True | ||
| except Exception as e: # pylint: disable=broad-exception-caught | ||
| logger.error("Disconnected from storage: %s", e) | ||
| return False | ||
|
|
||
|
|
||
| def get_increase_quota_statement(config: QuotaHandlersConfiguration) -> str: | ||
| """ | ||
| Select the SQL statement used to increase stored quota according to the database backend. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Optimize retry loop to avoid unnecessary final delay.
The current loop sleeps after each attempt, including the final failed one. This adds an unnecessary 1-second delay before returning.
Consider moving the sleep to the beginning of the loop after the first iteration:
🤖 Prompt for AI Agents