healthcheck: Don't report error for tasks that are probably running
This commit is contained in:
parent
4404223350
commit
4d66f76a02
@ -42,35 +42,43 @@ def _check_clusters() -> CheckResult:
|
|||||||
return CheckResultOk(f"{len(happy_clusters)}/{len(clusters)} clusters up")
|
return CheckResultOk(f"{len(happy_clusters)}/{len(clusters)} clusters up")
|
||||||
|
|
||||||
|
|
||||||
def _check_tasks() -> Iterable[CheckResult]:
|
def _check_task(now: datetime.datetime, schedule: Schedule) -> CheckResult:
|
||||||
now = timezone.now()
|
|
||||||
for schedule in Schedule.objects.all():
|
|
||||||
if not schedule.task:
|
if not schedule.task:
|
||||||
yield CheckResultFailure(f"Scheduled task {schedule} has never run!")
|
return CheckResultFailure(f"Scheduled task {schedule} has never run!")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
task = Task.objects.get(id=schedule.task)
|
task = Task.objects.get(id=schedule.task)
|
||||||
except Task.DoesNotExist:
|
except Task.DoesNotExist:
|
||||||
yield CheckResultFailure(
|
if now - schedule.next_run > datetime.timedelta(minutes=5):
|
||||||
f"Scheduled task {schedule}'s last task doesn't exist!"
|
return CheckResultFailure(
|
||||||
|
f"Scheduled task {schedule}'s last task doesn't exist, and is probably not still running!"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return CheckResultOk(
|
||||||
|
f"Schedule {schedule} has no task, but probably running now"
|
||||||
)
|
)
|
||||||
continue
|
|
||||||
|
|
||||||
if not task.success:
|
if not task.success:
|
||||||
yield CheckResultFailure(
|
return CheckResultFailure(
|
||||||
f"Scheduled task {schedule} failed at {task.started}"
|
f"Scheduled task {schedule} failed at {task.started}"
|
||||||
)
|
)
|
||||||
elif now - schedule.next_run > datetime.timedelta(hours=2):
|
elif now - schedule.next_run > datetime.timedelta(hours=2):
|
||||||
yield CheckResultFailure(
|
return CheckResultFailure(
|
||||||
f"Scheduled task {schedule} stale, last run at {task.started}"
|
f"Scheduled task {schedule} stale, last run at {task.started}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
yield CheckResultOk(
|
return CheckResultOk(
|
||||||
f"Scheduled task {schedule} ok, last run at {task.started}"
|
f"Scheduled task {schedule} ok, last run at {task.started}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_tasks() -> Iterable[CheckResult]:
|
||||||
|
now = timezone.now()
|
||||||
|
for schedule in Schedule.objects.all():
|
||||||
|
yield _check_task(now, schedule)
|
||||||
|
|
||||||
|
|
||||||
def healthcheck(request: HttpRequest):
|
def healthcheck(request: HttpRequest):
|
||||||
checks: list[CheckResult] = [_check_clusters(), *_check_tasks()]
|
checks: list[CheckResult] = [_check_clusters(), *_check_tasks()]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user