diff options
author | Andrea Restle-Lay <arestlel@redhat.com> | 2024-12-18 23:12:52 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-18 23:12:52 +0100 |
commit | 1b418f75e6346286ceed7f44ce4f81b46ff8e012 (patch) | |
tree | 90c889237c0b5defb801dcf46a14f494b4e42d04 | |
parent | Cleanup in-memory data from test that randomly causes other failures (#15716) (diff) | |
download | awx-1b418f75e6346286ceed7f44ce4f81b46ff8e012.tar.xz awx-1b418f75e6346286ceed7f44ce4f81b46ff8e012.zip |
AAP-36604 (analytics) Thousands of zombie/orphaned Slow/Stuck DB queries in controller querying active host count (#15715)
* lint
* change timeout to 5 minutes
* change timeout to 5 minutes
-rw-r--r-- | awx/main/tasks/host_metrics.py | 49 |
1 files changed, 31 insertions, 18 deletions
diff --git a/awx/main/tasks/host_metrics.py b/awx/main/tasks/host_metrics.py index e5f1263ad1..9e163cfa73 100644 --- a/awx/main/tasks/host_metrics.py +++ b/awx/main/tasks/host_metrics.py @@ -11,6 +11,8 @@ from awx.main.dispatch.publish import task from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly from awx.main.tasks.helpers import is_run_threshold_reached from awx.conf.license import get_license +from awx.main.utils.pglock import advisory_lock + logger = logging.getLogger('awx.main.tasks.host_metrics') @@ -90,7 +92,10 @@ class HostMetricTask: class HostMetricSummaryMonthlyTask: + LOCK_KEY = 'host_metric_summary_monthly' + LOCK_SESSION_TIMEOUT = 300000 # 5 minutes. """ + Task runs every four hours, longer lock timeout avoids premature termination due to high db load or other latency. This task computes last [threshold] months of HostMetricSummaryMonthly table [threshold] is setting CLEANUP_HOST_METRICS_HARD_THRESHOLD Each record in the table represents changes in HostMetric table in one month @@ -115,29 +120,37 @@ class HostMetricSummaryMonthlyTask: self.records_to_update = [] def execute(self): - self._load_existing_summaries() - self._load_hosts_added() - self._load_hosts_deleted() - # Get first month after last hard delete - month = self._get_first_month() - license_consumed = self._get_license_consumed_before(month) + with advisory_lock( + HostMetricSummaryMonthlyTask.LOCK_KEY, lock_session_timeout_milliseconds=HostMetricSummaryMonthlyTask.LOCK_SESSION_TIMEOUT, wait=False + ) as acquired: + if not acquired: + logger.info("Another instance of host_metric_summary_monthly is already running. Exiting.") + return + + self._load_existing_summaries() + self._load_hosts_added() + self._load_hosts_deleted() + + # Get first month after last hard delete + month = self._get_first_month() + license_consumed = self._get_license_consumed_before(month) - # Fill record for each month - while month <= datetime.date.today().replace(day=1): - summary = self._find_or_create_summary(month) - # Update summary and update license_consumed by hosts added/removed this month - self._update_summary(summary, month, license_consumed) - license_consumed = summary.license_consumed + # Fill record for each month + while month <= datetime.date.today().replace(day=1): + summary = self._find_or_create_summary(month) + # Update summary and update license_consumed by hosts added/removed this month + self._update_summary(summary, month, license_consumed) + license_consumed = summary.license_consumed - month = month + relativedelta(months=1) + month = month + relativedelta(months=1) - # Create/Update stats - HostMetricSummaryMonthly.objects.bulk_create(self.records_to_create, batch_size=1000) - HostMetricSummaryMonthly.objects.bulk_update(self.records_to_update, ['license_consumed', 'hosts_added', 'hosts_deleted'], batch_size=1000) + # Create/Update stats + HostMetricSummaryMonthly.objects.bulk_create(self.records_to_create, batch_size=1000) + HostMetricSummaryMonthly.objects.bulk_update(self.records_to_update, ['license_consumed', 'hosts_added', 'hosts_deleted'], batch_size=1000) - # Set timestamp of last run - settings.HOST_METRIC_SUMMARY_TASK_LAST_TS = now() + # Set timestamp of last run + settings.HOST_METRIC_SUMMARY_TASK_LAST_TS = now() def _get_license_consumed_before(self, month): license_consumed = 0 |