Add LAN-only status UI for caddy-autogen

This commit is contained in:
Joachim Friberg
2026-03-23 12:47:30 +01:00
parent 5b15a0aedd
commit 2346d5a096
9 changed files with 590 additions and 34 deletions
+271 -7
View File
@@ -4,10 +4,12 @@ import json
import os
import re
import sys
import threading
import time
import urllib.error
import urllib.parse
import urllib.request
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
TRUE_VALUES = {"1", "true", "yes", "on"}
@@ -116,7 +118,15 @@ def _build_fqdn(host_hint: str, base_domain: str) -> str:
return fqdn
def _collect_routes(docker_api_url: str, env_prefix: str, denylist: set, base_domain: str, default_scheme: str, default_path: str, default_health_uri: str):
def _collect_routes(
docker_api_url: str,
env_prefix: str,
denylist: set,
base_domain: str,
default_scheme: str,
default_path: str,
default_health_uri: str,
):
routes = []
containers = _get_json(f"{docker_api_url}/containers/json?all=0")
for c in containers:
@@ -175,7 +185,32 @@ def _collect_routes(docker_api_url: str, env_prefix: str, denylist: set, base_do
return routes
def _generate_caddyfile(routes, token: str, require_cloudflare: bool, allow_internal_tls_fallback: bool, wildcard_domain: str, cert_email: str):
def _append_status_site(out: list[str], status_ui_port: int, status_upstream: str) -> None:
out.append(f":{status_ui_port} {{")
out.append(" @allowed remote_ip private_ranges")
out.append(" handle @allowed {")
out.append(" @status_json path /status.json")
out.append(" handle @status_json {")
out.append(f" reverse_proxy {status_upstream}")
out.append(" }")
out.append(" root * /srv/status")
out.append(" file_server")
out.append(" }")
out.append(" respond \"forbidden\" 403")
out.append("}")
out.append("")
def _generate_caddyfile(
routes,
token: str,
require_cloudflare: bool,
allow_internal_tls_fallback: bool,
wildcard_domain: str,
cert_email: str,
status_ui_port: int,
status_upstream: str,
):
if require_cloudflare and not token:
raise RuntimeError("CLOUDFLARE_API_TOKEN is required in fail-closed mode")
@@ -189,6 +224,8 @@ def _generate_caddyfile(routes, token: str, require_cloudflare: bool, allow_inte
out.append("}")
out.append("")
_append_status_site(out, status_ui_port=status_ui_port, status_upstream=status_upstream)
if wildcard_domain and token:
out.append(f"{wildcard_domain}, *.{wildcard_domain} {{")
out.append(" tls {")
@@ -237,6 +274,147 @@ def _generate_caddyfile(routes, token: str, require_cloudflare: bool, allow_inte
return "\n".join(out)
def _parse_bind_addr(value: str, default_host: str = "0.0.0.0", default_port: int = 8089) -> tuple[str, int]:
raw = str(value or "").strip()
if not raw:
return default_host, default_port
if ":" not in raw:
raise ValueError(f"invalid bind address '{raw}', expected host:port")
host, port_raw = raw.rsplit(":", 1)
if not host:
host = default_host
try:
port = int(port_raw)
except ValueError as exc:
raise ValueError(f"invalid port in bind address '{raw}'") from exc
if port < 1 or port > 65535:
raise ValueError(f"port out of range in bind address '{raw}'")
return host, port
def _verify_cloudflare_token(verify_url: str, token: str) -> dict:
now = int(time.time())
if not token:
return {
"reachable": False,
"token_valid": False,
"last_check_ts": now,
"error": "CLOUDFLARE_API_TOKEN is missing",
}
req = urllib.request.Request(
verify_url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/json",
},
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
payload = json.loads(resp.read().decode("utf-8"))
return {
"reachable": True,
"token_valid": bool(payload.get("success", False)),
"last_check_ts": now,
"error": "",
}
except urllib.error.HTTPError as exc:
return {
"reachable": True,
"token_valid": False,
"last_check_ts": now,
"error": f"HTTP {exc.code}",
}
except urllib.error.URLError as exc:
return {
"reachable": False,
"token_valid": False,
"last_check_ts": now,
"error": f"connection failure: {exc}",
}
except Exception as exc:
return {
"reachable": False,
"token_valid": False,
"last_check_ts": now,
"error": str(exc),
}
def _collect_letsencrypt_hosts(caddy_data_dir: str) -> set[str]:
results: set[str] = set()
cert_root = os.path.join(caddy_data_dir, "caddy", "certificates")
if not os.path.isdir(cert_root):
return results
for root, _dirs, files in os.walk(cert_root):
if "letsencrypt" not in root.lower():
continue
for filename in files:
if not filename.endswith(".crt"):
continue
host = filename[:-4].lower()
if host.startswith("_."):
host = "*." + host[2:]
if host and host != "*" and (host.startswith("*.") or HOST_RE.match(host)):
results.add(host)
return results
def _has_matching_le_cert(route_fqdn: str, cert_hosts: set[str]) -> bool:
if route_fqdn in cert_hosts:
return True
for cert_host in cert_hosts:
if not cert_host.startswith("*."):
continue
suffix = cert_host[2:]
if route_fqdn == suffix or route_fqdn.endswith("." + suffix):
return True
return False
def _build_status_payload(state: dict) -> bytes:
body = json.dumps(state, separators=(",", ":"), sort_keys=True).encode("utf-8")
return body
def _start_status_server(bind_addr: str, snapshot: dict, lock: threading.Lock):
host, port = _parse_bind_addr(bind_addr)
class _Handler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path not in {"/status.json", "/healthz"}:
self.send_response(404)
self.end_headers()
return
if self.path == "/healthz":
self.send_response(200)
self.send_header("Content-Type", "text/plain; charset=utf-8")
self.end_headers()
self.wfile.write(b"ok")
return
with lock:
payload = _build_status_payload(snapshot)
self.send_response(200)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Cache-Control", "no-store")
self.send_header("Content-Length", str(len(payload)))
self.end_headers()
self.wfile.write(payload)
def log_message(self, fmt, *args):
return
server = ThreadingHTTPServer((host, port), _Handler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
_log(f"INFO: status endpoint listening on {host}:{port}")
return server, thread
def main():
defaults = _read_simple_yaml(os.getenv("CONFIG_FILE", "/app/config/defaults.yaml"))
@@ -249,6 +427,25 @@ def main():
default_scheme = str(_cfg("DEFAULT_SCHEME", defaults, "default_scheme", "http")).strip().lower()
default_path = str(_cfg("DEFAULT_PATH", defaults, "default_path", "/")).strip() or "/"
default_health_uri = str(_cfg("DEFAULT_HEALTH_URI", defaults, "default_health_uri", "")).strip()
status_bind = str(_cfg("STATUS_BIND", defaults, "status_bind", "0.0.0.0:8089")).strip()
status_ui_port_raw = _cfg("STATUS_UI_PORT", defaults, "status_ui_port", 31820)
status_upstream = str(_cfg("STATUS_UPSTREAM", defaults, "status_upstream", "discovery-agent:8089")).strip()
cf_verify_url = str(
_cfg(
"CF_VERIFY_URL",
defaults,
"cf_verify_url",
"https://api.cloudflare.com/client/v4/user/tokens/verify",
)
).strip()
caddy_data_dir = str(_cfg("CADDY_DATA_DIR", defaults, "caddy_data_dir", "/caddy-data")).strip()
try:
status_ui_port = int(status_ui_port_raw)
if status_ui_port < 1 or status_ui_port > 65535:
raise ValueError
except (TypeError, ValueError):
status_ui_port = 31820
poll_seconds_raw = _cfg("POLL_SECONDS", defaults, "poll_seconds", 15)
try:
@@ -268,12 +465,37 @@ def main():
token = os.getenv("CLOUDFLARE_API_TOKEN", "").strip()
last_digest = ""
snapshot_lock = threading.Lock()
snapshot = {
"app": {
"name": "caddy-autogen",
"status_ui_port": status_ui_port,
"status_upstream": status_upstream,
"require_cloudflare": require_cloudflare,
"allow_internal_tls_fallback": allow_internal_tls_fallback,
},
"last_tick_ts": 0,
"last_apply_ok": False,
"last_apply_http_status": 0,
"last_error": "not started",
"routes": [],
"cloudflare": {
"reachable": False,
"token_valid": False,
"last_check_ts": 0,
"error": "not checked",
},
"certs": [],
}
_start_status_server(status_bind, snapshot, snapshot_lock)
_log(
"INFO: starting caddy-autogen discovery-agent "
f"(docker_api_url={docker_api_url}, caddy_load_url={caddy_load_url}, poll_seconds={poll_seconds})"
)
while True:
tick_ts = int(time.time())
try:
routes = _collect_routes(
docker_api_url=docker_api_url,
@@ -284,6 +506,16 @@ def main():
default_path=default_path,
default_health_uri=default_health_uri,
)
cloudflare_status = _verify_cloudflare_token(cf_verify_url, token)
cert_hosts = _collect_letsencrypt_hosts(caddy_data_dir)
cert_rows = [
{
"fqdn": route["fqdn"],
"letsencrypt_present": _has_matching_le_cert(route["fqdn"], cert_hosts),
}
for route in routes
]
caddyfile = _generate_caddyfile(
routes=routes,
token=token,
@@ -291,21 +523,53 @@ def main():
allow_internal_tls_fallback=allow_internal_tls_fallback,
wildcard_domain=wildcard_domain,
cert_email=cert_email,
status_ui_port=status_ui_port,
status_upstream=status_upstream,
)
digest = hashlib.sha256(caddyfile.encode("utf-8")).hexdigest()
apply_ok = True
apply_status = 0
last_error = ""
if digest != last_digest:
status = _post_caddyfile(caddy_load_url, caddyfile)
_log(f"INFO: applied config (routes={len(routes)}, status={status})")
apply_status = _post_caddyfile(caddy_load_url, caddyfile)
_log(f"INFO: applied config (routes={len(routes)}, status={apply_status})")
last_digest = digest
else:
_log(f"INFO: no config changes (routes={len(routes)})")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = apply_ok
snapshot["last_apply_http_status"] = apply_status
snapshot["last_error"] = last_error
snapshot["routes"] = routes
snapshot["cloudflare"] = cloudflare_status
snapshot["certs"] = cert_rows
except urllib.error.HTTPError as e:
_log(f"ERROR: http failure {e.code} {e.reason}")
err = f"http failure {e.code} {e.reason}"
_log(f"ERROR: {err}")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = False
snapshot["last_apply_http_status"] = 0
snapshot["last_error"] = err
except urllib.error.URLError as e:
_log(f"ERROR: connection failure: {e}")
err = f"connection failure: {e}"
_log(f"ERROR: {err}")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = False
snapshot["last_apply_http_status"] = 0
snapshot["last_error"] = err
except Exception as e:
_log(f"ERROR: {e}")
err = str(e)
_log(f"ERROR: {err}")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = False
snapshot["last_apply_http_status"] = 0
snapshot["last_error"] = err
time.sleep(poll_seconds)