Compare commits
No commits in common. "38513e9c153766cad3ad075d1c85ccd74c8963ff" and "3e003bddb73aaf4243c035b627ecb662d931c833" have entirely different histories.
38513e9c15
...
3e003bddb7
@ -85,16 +85,6 @@ class Base(Configuration):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
CACHES = {
|
|
||||||
"default": {
|
|
||||||
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
|
|
||||||
},
|
|
||||||
"database": {
|
|
||||||
"BACKEND": "django.core.cache.backends.db.DatabaseCache",
|
|
||||||
"LOCATION": "django_cache",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||||
|
|
||||||
WSGI_APPLICATION = "cmsmanage.wsgi.application"
|
WSGI_APPLICATION = "cmsmanage.wsgi.application"
|
||||||
@ -167,7 +157,6 @@ class Base(Configuration):
|
|||||||
"error_reporter": {"admin_email": {}},
|
"error_reporter": {"admin_email": {}},
|
||||||
"ack_failures": True,
|
"ack_failures": True,
|
||||||
"max_attempts": 1,
|
"max_attempts": 1,
|
||||||
"cache": "database",
|
|
||||||
"ALT_CLUSTERS": {
|
"ALT_CLUSTERS": {
|
||||||
"internal": {
|
"internal": {
|
||||||
"retry": 60 * 60,
|
"retry": 60 * 60,
|
||||||
|
@ -25,8 +25,6 @@ from rest_framework import routers
|
|||||||
from membershipworks.api import router as membershipworks_router
|
from membershipworks.api import router as membershipworks_router
|
||||||
from paperwork.api import router as paperwork_router
|
from paperwork.api import router as paperwork_router
|
||||||
|
|
||||||
from . import views
|
|
||||||
|
|
||||||
router = routers.DefaultRouter()
|
router = routers.DefaultRouter()
|
||||||
router.registry.extend(paperwork_router.registry)
|
router.registry.extend(paperwork_router.registry)
|
||||||
router.registry.extend(membershipworks_router.registry)
|
router.registry.extend(membershipworks_router.registry)
|
||||||
@ -61,7 +59,6 @@ urlpatterns = [
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
path("api-auth/", include("rest_framework.urls")),
|
path("api-auth/", include("rest_framework.urls")),
|
||||||
path("healthcheck", views.healthcheck),
|
|
||||||
# path("markdownx/", include("markdownx.urls")),
|
# path("markdownx/", include("markdownx.urls")),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1,86 +0,0 @@
|
|||||||
import dataclasses
|
|
||||||
import datetime
|
|
||||||
from collections.abc import Iterable
|
|
||||||
|
|
||||||
from django.http import HttpRequest, HttpResponse
|
|
||||||
from django.utils import timezone
|
|
||||||
|
|
||||||
from django_q.conf import Conf
|
|
||||||
from django_q.models import Schedule, Task
|
|
||||||
from django_q.status import Stat
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class CheckResult:
|
|
||||||
message: str
|
|
||||||
ok: bool
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class CheckResultOk(CheckResult):
|
|
||||||
message: str
|
|
||||||
ok: bool = True
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class CheckResultFailure(CheckResult):
|
|
||||||
message: str
|
|
||||||
ok: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
def _check_clusters() -> CheckResult:
|
|
||||||
clusters = Stat.get_all()
|
|
||||||
happy_clusters = [
|
|
||||||
cluster for cluster in clusters if cluster.status in [Conf.IDLE, Conf.WORKING]
|
|
||||||
]
|
|
||||||
|
|
||||||
Schedule.objects.all()
|
|
||||||
|
|
||||||
if not len(clusters):
|
|
||||||
return CheckResultFailure("No clusters running!")
|
|
||||||
elif len(happy_clusters) != len(clusters):
|
|
||||||
return CheckResultFailure(f"{len(happy_clusters)}/{len(clusters)} clusters up")
|
|
||||||
else:
|
|
||||||
return CheckResultOk(f"{len(happy_clusters)}/{len(clusters)} clusters up")
|
|
||||||
|
|
||||||
|
|
||||||
def _check_tasks() -> Iterable[CheckResult]:
|
|
||||||
now = timezone.now()
|
|
||||||
for schedule in Schedule.objects.all():
|
|
||||||
if not schedule.task:
|
|
||||||
yield CheckResultFailure(f"Scheduled task {schedule} has never run!")
|
|
||||||
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
task = Task.objects.get(id=schedule.task)
|
|
||||||
except Task.DoesNotExist:
|
|
||||||
yield CheckResultFailure(
|
|
||||||
f"Scheduled task {schedule}'s last task doesn't exist!"
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not task.success:
|
|
||||||
yield CheckResultFailure(
|
|
||||||
f"Scheduled task {schedule} failed at {task.started}"
|
|
||||||
)
|
|
||||||
elif now - schedule.next_run > datetime.timedelta(hours=2):
|
|
||||||
yield CheckResultFailure(
|
|
||||||
f"Scheduled task {schedule} stale, last run at {task.started}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
yield CheckResultOk(
|
|
||||||
f"Scheduled task {schedule} ok, last run at {task.started}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def healthcheck(request: HttpRequest):
|
|
||||||
checks: list[CheckResult] = [_check_clusters(), *_check_tasks()]
|
|
||||||
|
|
||||||
all_ok = all(check.ok for check in checks)
|
|
||||||
messages = (check.message for check in sorted(checks, key=lambda c: c.ok))
|
|
||||||
|
|
||||||
return HttpResponse(
|
|
||||||
("OK: " if all_ok else "CRITICAL: ") + "\n".join(messages),
|
|
||||||
content_type="text/plain",
|
|
||||||
status=200 if all_ok else 500,
|
|
||||||
)
|
|
Loading…
Reference in New Issue
Block a user