summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Restle-Lay <arestlel@redhat.com>2024-12-18 23:12:52 +0100
committerGitHub <noreply@github.com>2024-12-18 23:12:52 +0100
commit1b418f75e6346286ceed7f44ce4f81b46ff8e012 (patch)
tree90c889237c0b5defb801dcf46a14f494b4e42d04
parentCleanup in-memory data from test that randomly causes other failures (#15716) (diff)
downloadawx-1b418f75e6346286ceed7f44ce4f81b46ff8e012.tar.xz
awx-1b418f75e6346286ceed7f44ce4f81b46ff8e012.zip
AAP-36604 (analytics) Thousands of zombie/orphaned Slow/Stuck DB queries in controller querying active host count (#15715)
* lint * change timeout to 5 minutes * change timeout to 5 minutes
-rw-r--r--awx/main/tasks/host_metrics.py49
1 files changed, 31 insertions, 18 deletions
diff --git a/awx/main/tasks/host_metrics.py b/awx/main/tasks/host_metrics.py
index e5f1263ad1..9e163cfa73 100644
--- a/awx/main/tasks/host_metrics.py
+++ b/awx/main/tasks/host_metrics.py
@@ -11,6 +11,8 @@ from awx.main.dispatch.publish import task
from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
from awx.main.tasks.helpers import is_run_threshold_reached
from awx.conf.license import get_license
+from awx.main.utils.pglock import advisory_lock
+
logger = logging.getLogger('awx.main.tasks.host_metrics')
@@ -90,7 +92,10 @@ class HostMetricTask:
class HostMetricSummaryMonthlyTask:
+ LOCK_KEY = 'host_metric_summary_monthly'
+ LOCK_SESSION_TIMEOUT = 300000 # 5 minutes.
"""
+ Task runs every four hours, longer lock timeout avoids premature termination due to high db load or other latency.
This task computes last [threshold] months of HostMetricSummaryMonthly table
[threshold] is setting CLEANUP_HOST_METRICS_HARD_THRESHOLD
Each record in the table represents changes in HostMetric table in one month
@@ -115,29 +120,37 @@ class HostMetricSummaryMonthlyTask:
self.records_to_update = []
def execute(self):
- self._load_existing_summaries()
- self._load_hosts_added()
- self._load_hosts_deleted()
- # Get first month after last hard delete
- month = self._get_first_month()
- license_consumed = self._get_license_consumed_before(month)
+ with advisory_lock(
+ HostMetricSummaryMonthlyTask.LOCK_KEY, lock_session_timeout_milliseconds=HostMetricSummaryMonthlyTask.LOCK_SESSION_TIMEOUT, wait=False
+ ) as acquired:
+ if not acquired:
+ logger.info("Another instance of host_metric_summary_monthly is already running. Exiting.")
+ return
+
+ self._load_existing_summaries()
+ self._load_hosts_added()
+ self._load_hosts_deleted()
+
+ # Get first month after last hard delete
+ month = self._get_first_month()
+ license_consumed = self._get_license_consumed_before(month)
- # Fill record for each month
- while month <= datetime.date.today().replace(day=1):
- summary = self._find_or_create_summary(month)
- # Update summary and update license_consumed by hosts added/removed this month
- self._update_summary(summary, month, license_consumed)
- license_consumed = summary.license_consumed
+ # Fill record for each month
+ while month <= datetime.date.today().replace(day=1):
+ summary = self._find_or_create_summary(month)
+ # Update summary and update license_consumed by hosts added/removed this month
+ self._update_summary(summary, month, license_consumed)
+ license_consumed = summary.license_consumed
- month = month + relativedelta(months=1)
+ month = month + relativedelta(months=1)
- # Create/Update stats
- HostMetricSummaryMonthly.objects.bulk_create(self.records_to_create, batch_size=1000)
- HostMetricSummaryMonthly.objects.bulk_update(self.records_to_update, ['license_consumed', 'hosts_added', 'hosts_deleted'], batch_size=1000)
+ # Create/Update stats
+ HostMetricSummaryMonthly.objects.bulk_create(self.records_to_create, batch_size=1000)
+ HostMetricSummaryMonthly.objects.bulk_update(self.records_to_update, ['license_consumed', 'hosts_added', 'hosts_deleted'], batch_size=1000)
- # Set timestamp of last run
- settings.HOST_METRIC_SUMMARY_TASK_LAST_TS = now()
+ # Set timestamp of last run
+ settings.HOST_METRIC_SUMMARY_TASK_LAST_TS = now()
def _get_license_consumed_before(self, month):
license_consumed = 0