From 15e2d6d1a9a216e75fcbc4d464512fa31f7020df Mon Sep 17 00:00:00 2001 From: Athul Date: Wed, 4 Mar 2026 20:17:20 +0530 Subject: [PATCH 1/6] [MISC] Add --active-only filter to backfill_metrics command Allow filtering organizations during backfill to only process those with active status. Adds graceful fallback for OSS mode. Also improves progress output with [n/N] counter, extracts org resolution into _resolve_org_ids(), and fixes logger formatting to use lazy %s instead of f-strings. Co-Authored-By: Claude Opus 4.6 --- .../management/commands/backfill_metrics.py | 98 +++++++++++++++---- 1 file changed, 81 insertions(+), 17 deletions(-) diff --git a/backend/dashboard_metrics/management/commands/backfill_metrics.py b/backend/dashboard_metrics/management/commands/backfill_metrics.py index 2c8287ab60..c62771a372 100644 --- a/backend/dashboard_metrics/management/commands/backfill_metrics.py +++ b/backend/dashboard_metrics/management/commands/backfill_metrics.py @@ -7,6 +7,7 @@ python manage.py backfill_metrics --days=30 python manage.py backfill_metrics --days=90 --org-id=12 python manage.py backfill_metrics --days=7 --dry-run + python manage.py backfill_metrics --days=90 --active-only """ import logging @@ -27,6 +28,14 @@ logger = logging.getLogger(__name__) +# Cloud-only: Subscription model for --subscribed-only filtering +try: + from subscription_v2.models import Subscription + + HAS_SUBSCRIPTION = True +except ImportError: + HAS_SUBSCRIPTION = False + class Command(BaseCommand): help = "Backfill metrics from source tables into aggregated tables" @@ -84,6 +93,14 @@ def add_arguments(self, parser): action="store_true", help="Skip monthly aggregation", ) + parser.add_argument( + "--active-only", + action="store_true", + help=( + "Only process orgs with an active subscription " + "(cloud-only, requires subscription_v2)" + ), + ) def handle(self, *args, **options): days = options["days"] @@ -92,6 +109,7 @@ def handle(self, *args, **options): skip_hourly = options["skip_hourly"] skip_daily = options["skip_daily"] skip_monthly = options["skip_monthly"] + active_only = options["active_only"] end_date = timezone.now() start_date = end_date - timedelta(days=days) @@ -103,16 +121,16 @@ def handle(self, *args, **options): self.stdout.write(self.style.WARNING("DRY RUN - no changes will be made")) # Get organizations to process - if org_id: - try: - orgs = [Organization.objects.get(id=org_id)] - self.stdout.write(f"Processing single org: {org_id}") - except Organization.DoesNotExist: - self.stderr.write(self.style.ERROR(f"Organization {org_id} not found")) - return - else: - orgs = list(Organization.objects.all()) - self.stdout.write(f"Processing {len(orgs)} organizations") + org_ids = self._resolve_org_ids( + org_id=org_id, + active_only=active_only, + ) + + if not org_ids: + self.stdout.write(self.style.WARNING("No organizations to process")) + return + + self.stdout.write(f"Processing {len(org_ids)} organizations") total_stats = { "hourly": {"upserted": 0}, @@ -121,14 +139,15 @@ def handle(self, *args, **options): "errors": 0, } - for org in orgs: - org_id_str = str(org.id) - self.stdout.write(f"\nProcessing org: {org.display_name} ({org_id_str})") + for i, current_org_id in enumerate(org_ids): + self.stdout.write( + f"\n[{i + 1}/{len(org_ids)}] Processing org: {current_org_id}" + ) try: # Collect all metric data for this org hourly_data, daily_data, monthly_data = self._collect_metrics( - org_id_str, start_date, end_date + current_org_id, start_date, end_date ) self.stdout.write( @@ -155,10 +174,10 @@ def handle(self, *args, **options): except Exception as e: self.stderr.write( - self.style.ERROR(f" Error processing org {org_id_str}: {e}") + self.style.ERROR(f" Error processing org {current_org_id}: {e}") ) total_stats["errors"] += 1 - logger.exception(f"Error backfilling org {org_id_str}") + logger.exception("Error backfilling org %s", current_org_id) # Print summary self.stdout.write("\n" + "=" * 50) @@ -168,6 +187,51 @@ def handle(self, *args, **options): self.stdout.write(f"Monthly: {total_stats['monthly']['upserted']} upserted") self.stdout.write(f"Errors: {total_stats['errors']}") + def _resolve_org_ids( + self, + org_id: str | None, + active_only: bool, + ) -> list[str]: + """Resolve the list of organization IDs to process. + + Applies filters in order: + 1. Single org (--org-id) — returns immediately + 2. Active subscription filter (--active-only) — cloud only + """ + # Single org mode + if org_id: + if not Organization.objects.filter(id=org_id).exists(): + self.stderr.write(self.style.ERROR(f"Organization {org_id} not found")) + return [] + self.stdout.write(f"Single org mode: {org_id}") + return [org_id] + + # Start with all org IDs + all_org_ids = set(Organization.objects.values_list("id", flat=True)) + self.stdout.write(f"Total organizations: {len(all_org_ids)}") + + # Filter: Active subscription (cloud only) + if active_only: + if not HAS_SUBSCRIPTION: + self.stdout.write( + self.style.WARNING( + "subscription_v2 not available (OSS mode), ignoring --active-only" + ) + ) + else: + active_sub_org_ids = set( + Subscription.objects.filter(is_active=True).values_list( + "organization_id", flat=True + ) + ) + all_org_ids &= active_sub_org_ids + self.stdout.write( + f"After active subscription filter: {len(all_org_ids)} orgs " + f"({len(active_sub_org_ids)} with active subscription)" + ) + + return sorted(str(oid) for oid in all_org_ids) + def _collect_metrics( self, org_id: str, start_date: datetime, end_date: datetime ) -> tuple[dict, dict, dict]: @@ -238,7 +302,7 @@ def _collect_metrics( monthly_agg[key]["count"] += 1 except Exception as e: - logger.warning(f"Error querying {metric_name} for org {org_id}: {e}") + logger.warning("Error querying %s for org %s: %s", metric_name, org_id, e) return hourly_agg, daily_agg, monthly_agg From 59d996901a18128243d8c42d04232cfa45e8e739 Mon Sep 17 00:00:00 2001 From: Athul Date: Wed, 4 Mar 2026 20:20:51 +0530 Subject: [PATCH 2/6] fix: correct subscription import path to pluggable_apps Co-Authored-By: Claude Opus 4.6 --- .../dashboard_metrics/management/commands/backfill_metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dashboard_metrics/management/commands/backfill_metrics.py b/backend/dashboard_metrics/management/commands/backfill_metrics.py index c62771a372..29a4bd9c9d 100644 --- a/backend/dashboard_metrics/management/commands/backfill_metrics.py +++ b/backend/dashboard_metrics/management/commands/backfill_metrics.py @@ -28,9 +28,9 @@ logger = logging.getLogger(__name__) -# Cloud-only: Subscription model for --subscribed-only filtering +# Cloud-only: Subscription model for --active-only filtering try: - from subscription_v2.models import Subscription + from pluggable_apps.subscription_v2.models import Subscription HAS_SUBSCRIPTION = True except ImportError: From 0cdebe24db0ee2af033bc42fe82efd61db30a18d Mon Sep 17 00:00:00 2001 From: Athul Date: Thu, 5 Mar 2026 00:10:40 +0530 Subject: [PATCH 3/6] fix: use organization_id field instead of pk for org resolution Organization.id is the auto-increment int PK, but Subscription.organization_id and all service query methods use Organization.organization_id (the Auth0 org identifier string). The set intersection was always empty due to this type mismatch. Co-Authored-By: Claude Opus 4.6 --- .../management/commands/backfill_metrics.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/dashboard_metrics/management/commands/backfill_metrics.py b/backend/dashboard_metrics/management/commands/backfill_metrics.py index 29a4bd9c9d..9fd94738c6 100644 --- a/backend/dashboard_metrics/management/commands/backfill_metrics.py +++ b/backend/dashboard_metrics/management/commands/backfill_metrics.py @@ -5,7 +5,7 @@ Usage: python manage.py backfill_metrics --days=30 - python manage.py backfill_metrics --days=90 --org-id=12 + python manage.py backfill_metrics --days=90 --org-id=org_abc123 python manage.py backfill_metrics --days=7 --dry-run python manage.py backfill_metrics --days=90 --active-only """ @@ -200,14 +200,16 @@ def _resolve_org_ids( """ # Single org mode if org_id: - if not Organization.objects.filter(id=org_id).exists(): + if not Organization.objects.filter(organization_id=org_id).exists(): self.stderr.write(self.style.ERROR(f"Organization {org_id} not found")) return [] self.stdout.write(f"Single org mode: {org_id}") return [org_id] - # Start with all org IDs - all_org_ids = set(Organization.objects.values_list("id", flat=True)) + # Start with all org IDs (organization_id is the Auth0 org identifier) + all_org_ids = set( + Organization.objects.values_list("organization_id", flat=True) + ) self.stdout.write(f"Total organizations: {len(all_org_ids)}") # Filter: Active subscription (cloud only) From b4e0dfde911462707b39756e6d46d44e2b75bbb0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:41:11 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../dashboard_metrics/management/commands/backfill_metrics.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/dashboard_metrics/management/commands/backfill_metrics.py b/backend/dashboard_metrics/management/commands/backfill_metrics.py index 9fd94738c6..87ded5bef2 100644 --- a/backend/dashboard_metrics/management/commands/backfill_metrics.py +++ b/backend/dashboard_metrics/management/commands/backfill_metrics.py @@ -207,9 +207,7 @@ def _resolve_org_ids( return [org_id] # Start with all org IDs (organization_id is the Auth0 org identifier) - all_org_ids = set( - Organization.objects.values_list("organization_id", flat=True) - ) + all_org_ids = set(Organization.objects.values_list("organization_id", flat=True)) self.stdout.write(f"Total organizations: {len(all_org_ids)}") # Filter: Active subscription (cloud only) From 812de006ccd5fc30107373360c8834d953ce5b0f Mon Sep 17 00:00:00 2001 From: Athul Date: Thu, 5 Mar 2026 08:40:13 +0530 Subject: [PATCH 5/6] fix: use Organization.id (int PK) instead of organization_id string Subscription.organization_id stores Auth0 org strings (org_*) while all downstream service queries and bulk upserts use Organization.id (int PK) via FK references. Map subscription org strings back to Organization PKs via organization_id__in lookup to ensure correct downstream behavior. Co-Authored-By: Claude Opus 4.6 --- .../management/commands/backfill_metrics.py | 59 +++++++++++-------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/backend/dashboard_metrics/management/commands/backfill_metrics.py b/backend/dashboard_metrics/management/commands/backfill_metrics.py index 87ded5bef2..49e973d23f 100644 --- a/backend/dashboard_metrics/management/commands/backfill_metrics.py +++ b/backend/dashboard_metrics/management/commands/backfill_metrics.py @@ -5,7 +5,7 @@ Usage: python manage.py backfill_metrics --days=30 - python manage.py backfill_metrics --days=90 --org-id=org_abc123 + python manage.py backfill_metrics --days=90 --org-id=5 python manage.py backfill_metrics --days=7 --dry-run python manage.py backfill_metrics --days=90 --active-only """ @@ -192,7 +192,11 @@ def _resolve_org_ids( org_id: str | None, active_only: bool, ) -> list[str]: - """Resolve the list of organization IDs to process. + """Resolve the list of organization PKs to process. + + Returns Organization.id (int PK) values as strings, since all + downstream queries (services, bulk upserts) use the FK which + references Organization.id, not Organization.organization_id. Applies filters in order: 1. Single org (--org-id) — returns immediately @@ -200,35 +204,40 @@ def _resolve_org_ids( """ # Single org mode if org_id: - if not Organization.objects.filter(organization_id=org_id).exists(): + org = Organization.objects.filter(id=org_id).first() + if not org: self.stderr.write(self.style.ERROR(f"Organization {org_id} not found")) return [] self.stdout.write(f"Single org mode: {org_id}") - return [org_id] - - # Start with all org IDs (organization_id is the Auth0 org identifier) - all_org_ids = set(Organization.objects.values_list("organization_id", flat=True)) - self.stdout.write(f"Total organizations: {len(all_org_ids)}") + return [str(org.id)] - # Filter: Active subscription (cloud only) - if active_only: - if not HAS_SUBSCRIPTION: - self.stdout.write( - self.style.WARNING( - "subscription_v2 not available (OSS mode), ignoring --active-only" - ) + # Get org PKs based on filtering mode + if active_only and HAS_SUBSCRIPTION: + active_org_id_strings = set( + Subscription.objects.filter(is_active=True).values_list( + "organization_id", flat=True ) - else: - active_sub_org_ids = set( - Subscription.objects.filter(is_active=True).values_list( - "organization_id", flat=True - ) - ) - all_org_ids &= active_sub_org_ids - self.stdout.write( - f"After active subscription filter: {len(all_org_ids)} orgs " - f"({len(active_sub_org_ids)} with active subscription)" + ) + # Map org_* strings back to Organization PKs + all_org_ids = set( + Organization.objects.filter( + organization_id__in=active_org_id_strings + ).values_list("id", flat=True) + ) + self.stdout.write( + f"Active organizations (subscription filter): {len(all_org_ids)}" + ) + elif active_only and not HAS_SUBSCRIPTION: + self.stdout.write( + self.style.WARNING( + "subscription_v2 not available (OSS mode), ignoring --active-only" ) + ) + all_org_ids = set(Organization.objects.values_list("id", flat=True)) + self.stdout.write(f"Total organizations: {len(all_org_ids)}") + else: + all_org_ids = set(Organization.objects.values_list("id", flat=True)) + self.stdout.write(f"Total organizations: {len(all_org_ids)}") return sorted(str(oid) for oid in all_org_ids) From 0d435110d5fed6a2bcbeb9351f61e9ac491f0952 Mon Sep 17 00:00:00 2001 From: Athul Date: Thu, 5 Mar 2026 08:56:05 +0530 Subject: [PATCH 6/6] fix: handle malformed --org-id input gracefully Wrap Organization PK lookup in try/except so non-integer values (e.g. --org-id=abc) show a friendly "not found" error instead of crashing with an unhandled ValueError. Co-Authored-By: Claude Opus 4.6 --- .../management/commands/backfill_metrics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/dashboard_metrics/management/commands/backfill_metrics.py b/backend/dashboard_metrics/management/commands/backfill_metrics.py index 49e973d23f..4c71fcd329 100644 --- a/backend/dashboard_metrics/management/commands/backfill_metrics.py +++ b/backend/dashboard_metrics/management/commands/backfill_metrics.py @@ -204,7 +204,10 @@ def _resolve_org_ids( """ # Single org mode if org_id: - org = Organization.objects.filter(id=org_id).first() + try: + org = Organization.objects.filter(id=org_id).first() + except (ValueError, TypeError): + org = None if not org: self.stderr.write(self.style.ERROR(f"Organization {org_id} not found")) return []