healthcheck: Don't report error for tasks that are probably running
All checks were successful
Ruff / ruff (push) Successful in 28s
Test / test (push) Successful in 4m38s

This commit is contained in:
Adam Goldsmith 2024-05-13 21:43:40 -04:00
parent 4404223350
commit 4d66f76a02

View File

@ -42,35 +42,43 @@ def _check_clusters() -> CheckResult:
return CheckResultOk(f"{len(happy_clusters)}/{len(clusters)} clusters up") return CheckResultOk(f"{len(happy_clusters)}/{len(clusters)} clusters up")
def _check_tasks() -> Iterable[CheckResult]: def _check_task(now: datetime.datetime, schedule: Schedule) -> CheckResult:
now = timezone.now()
for schedule in Schedule.objects.all():
if not schedule.task: if not schedule.task:
yield CheckResultFailure(f"Scheduled task {schedule} has never run!") return CheckResultFailure(f"Scheduled task {schedule} has never run!")
else: else:
try: try:
task = Task.objects.get(id=schedule.task) task = Task.objects.get(id=schedule.task)
except Task.DoesNotExist: except Task.DoesNotExist:
yield CheckResultFailure( if now - schedule.next_run > datetime.timedelta(minutes=5):
f"Scheduled task {schedule}'s last task doesn't exist!" return CheckResultFailure(
f"Scheduled task {schedule}'s last task doesn't exist, and is probably not still running!"
)
else:
return CheckResultOk(
f"Schedule {schedule} has no task, but probably running now"
) )
continue
if not task.success: if not task.success:
yield CheckResultFailure( return CheckResultFailure(
f"Scheduled task {schedule} failed at {task.started}" f"Scheduled task {schedule} failed at {task.started}"
) )
elif now - schedule.next_run > datetime.timedelta(hours=2): elif now - schedule.next_run > datetime.timedelta(hours=2):
yield CheckResultFailure( return CheckResultFailure(
f"Scheduled task {schedule} stale, last run at {task.started}" f"Scheduled task {schedule} stale, last run at {task.started}"
) )
else: else:
yield CheckResultOk( return CheckResultOk(
f"Scheduled task {schedule} ok, last run at {task.started}" f"Scheduled task {schedule} ok, last run at {task.started}"
) )
def _check_tasks() -> Iterable[CheckResult]:
now = timezone.now()
for schedule in Schedule.objects.all():
yield _check_task(now, schedule)
def healthcheck(request: HttpRequest): def healthcheck(request: HttpRequest):
checks: list[CheckResult] = [_check_clusters(), *_check_tasks()] checks: list[CheckResult] = [_check_clusters(), *_check_tasks()]