healthcheck: Don't report error for tasks that are probably running
This commit is contained in:
parent
4404223350
commit
4d66f76a02
@ -42,33 +42,41 @@ def _check_clusters() -> CheckResult:
|
||||
return CheckResultOk(f"{len(happy_clusters)}/{len(clusters)} clusters up")
|
||||
|
||||
|
||||
def _check_task(now: datetime.datetime, schedule: Schedule) -> CheckResult:
|
||||
if not schedule.task:
|
||||
return CheckResultFailure(f"Scheduled task {schedule} has never run!")
|
||||
|
||||
else:
|
||||
try:
|
||||
task = Task.objects.get(id=schedule.task)
|
||||
except Task.DoesNotExist:
|
||||
if now - schedule.next_run > datetime.timedelta(minutes=5):
|
||||
return CheckResultFailure(
|
||||
f"Scheduled task {schedule}'s last task doesn't exist, and is probably not still running!"
|
||||
)
|
||||
else:
|
||||
return CheckResultOk(
|
||||
f"Schedule {schedule} has no task, but probably running now"
|
||||
)
|
||||
|
||||
if not task.success:
|
||||
return CheckResultFailure(
|
||||
f"Scheduled task {schedule} failed at {task.started}"
|
||||
)
|
||||
elif now - schedule.next_run > datetime.timedelta(hours=2):
|
||||
return CheckResultFailure(
|
||||
f"Scheduled task {schedule} stale, last run at {task.started}"
|
||||
)
|
||||
else:
|
||||
return CheckResultOk(
|
||||
f"Scheduled task {schedule} ok, last run at {task.started}"
|
||||
)
|
||||
|
||||
|
||||
def _check_tasks() -> Iterable[CheckResult]:
|
||||
now = timezone.now()
|
||||
for schedule in Schedule.objects.all():
|
||||
if not schedule.task:
|
||||
yield CheckResultFailure(f"Scheduled task {schedule} has never run!")
|
||||
|
||||
else:
|
||||
try:
|
||||
task = Task.objects.get(id=schedule.task)
|
||||
except Task.DoesNotExist:
|
||||
yield CheckResultFailure(
|
||||
f"Scheduled task {schedule}'s last task doesn't exist!"
|
||||
)
|
||||
continue
|
||||
|
||||
if not task.success:
|
||||
yield CheckResultFailure(
|
||||
f"Scheduled task {schedule} failed at {task.started}"
|
||||
)
|
||||
elif now - schedule.next_run > datetime.timedelta(hours=2):
|
||||
yield CheckResultFailure(
|
||||
f"Scheduled task {schedule} stale, last run at {task.started}"
|
||||
)
|
||||
else:
|
||||
yield CheckResultOk(
|
||||
f"Scheduled task {schedule} ok, last run at {task.started}"
|
||||
)
|
||||
yield _check_task(now, schedule)
|
||||
|
||||
|
||||
def healthcheck(request: HttpRequest):
|
||||
|
Loading…
Reference in New Issue
Block a user