Files
2026-03-23 12:47:30 +01:00

583 lines
20 KiB
Python

#!/usr/bin/env python3
import hashlib
import json
import os
import re
import sys
import threading
import time
import urllib.error
import urllib.parse
import urllib.request
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
TRUE_VALUES = {"1", "true", "yes", "on"}
HOST_RE = re.compile(r"^[a-z0-9.-]+$")
def _to_bool(value: str, default: bool = False) -> bool:
if value is None:
return default
return value.strip().lower() in TRUE_VALUES
def _read_simple_yaml(path: str) -> dict:
data = {}
try:
with open(path, "r", encoding="utf-8") as f:
for line in f:
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
if ":" not in stripped:
continue
key, raw_val = stripped.split(":", 1)
key = key.strip()
val = raw_val.strip()
if val.startswith('"') and val.endswith('"'):
val = val[1:-1]
elif val.lower() in {"true", "false"}:
val = val.lower() == "true"
else:
try:
val = int(val)
except ValueError:
pass
data[key] = val
except FileNotFoundError:
pass
return data
def _cfg(name: str, defaults: dict, key: str, fallback=""):
val = os.getenv(name)
if val not in (None, ""):
return val
return defaults.get(key, fallback)
def _log(msg: str):
print(msg, flush=True)
def _get_json(url: str):
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=10) as resp:
return json.loads(resp.read().decode("utf-8"))
def _post_caddyfile(url: str, caddyfile: str):
req = urllib.request.Request(
url,
data=caddyfile.encode("utf-8"),
method="POST",
headers={"Content-Type": "text/caddyfile"},
)
with urllib.request.urlopen(req, timeout=15) as resp:
return resp.status
def _env_map(inspect_obj: dict) -> dict:
out = {}
env_items = (inspect_obj.get("Config") or {}).get("Env") or []
for item in env_items:
if "=" in item:
key, value = item.split("=", 1)
out[key] = value
return out
def _published_tcp_ports(inspect_obj: dict) -> dict:
ports = {}
net_settings = (inspect_obj.get("NetworkSettings") or {}).get("Ports") or {}
for container_port, mappings in net_settings.items():
if not container_port.endswith("/tcp"):
continue
container_num = container_port.split("/", 1)[0]
if not mappings:
continue
host_port = mappings[0].get("HostPort")
if host_port:
ports[container_num] = host_port
return ports
def _normalize_name(raw_name: str) -> str:
cleaned = raw_name.strip().lstrip("/").lower()
cleaned = re.sub(r"[^a-z0-9-]", "-", cleaned)
cleaned = re.sub(r"-+", "-", cleaned).strip("-")
return cleaned or "app"
def _build_fqdn(host_hint: str, base_domain: str) -> str:
if "." in host_hint:
fqdn = host_hint.lower()
else:
fqdn = f"{host_hint.lower()}.{base_domain.lower()}" if base_domain else ""
return fqdn
def _collect_routes(
docker_api_url: str,
env_prefix: str,
denylist: set,
base_domain: str,
default_scheme: str,
default_path: str,
default_health_uri: str,
):
routes = []
containers = _get_json(f"{docker_api_url}/containers/json?all=0")
for c in containers:
cid = c.get("Id")
names = c.get("Names") or []
primary_name = _normalize_name(names[0] if names else c.get("Image", "container"))
if primary_name in denylist:
continue
inspect_obj = _get_json(f"{docker_api_url}/containers/{cid}/json")
envs = _env_map(inspect_obj)
enabled = _to_bool(envs.get(f"{env_prefix}ENABLE", "false"))
if not enabled:
continue
target_port = envs.get(f"{env_prefix}TARGET_PORT", "").strip()
if not target_port.isdigit():
_log(f"WARN: skip {primary_name}: invalid TARGET_PORT='{target_port}'")
continue
port_map = _published_tcp_ports(inspect_obj)
host_port = port_map.get(target_port)
if not host_port:
_log(f"WARN: skip {primary_name}: target port {target_port} is not published as TCP")
continue
host_hint = envs.get(f"{env_prefix}HOST", "").strip() or primary_name
fqdn = _build_fqdn(host_hint, base_domain)
if not fqdn or not HOST_RE.match(fqdn):
_log(f"WARN: skip {primary_name}: invalid fqdn '{fqdn}'")
continue
scheme = (envs.get(f"{env_prefix}SCHEME", "") or default_scheme).strip().lower()
if scheme not in {"http", "https"}:
_log(f"WARN: skip {primary_name}: invalid scheme '{scheme}'")
continue
path = (envs.get(f"{env_prefix}PATH", "") or default_path).strip() or "/"
if not path.startswith("/"):
path = "/" + path
health_uri = (envs.get(f"{env_prefix}HEALTH_URI", "") or default_health_uri).strip()
routes.append(
{
"name": primary_name,
"fqdn": fqdn,
"scheme": scheme,
"upstream": f"host.docker.internal:{host_port}",
"path": path,
"health_uri": health_uri,
}
)
routes.sort(key=lambda r: r["fqdn"])
return routes
def _append_status_site(out: list[str], status_ui_port: int, status_upstream: str) -> None:
out.append(f":{status_ui_port} {{")
out.append(" @allowed remote_ip private_ranges")
out.append(" handle @allowed {")
out.append(" @status_json path /status.json")
out.append(" handle @status_json {")
out.append(f" reverse_proxy {status_upstream}")
out.append(" }")
out.append(" root * /srv/status")
out.append(" file_server")
out.append(" }")
out.append(" respond \"forbidden\" 403")
out.append("}")
out.append("")
def _generate_caddyfile(
routes,
token: str,
require_cloudflare: bool,
allow_internal_tls_fallback: bool,
wildcard_domain: str,
cert_email: str,
status_ui_port: int,
status_upstream: str,
):
if require_cloudflare and not token:
raise RuntimeError("CLOUDFLARE_API_TOKEN is required in fail-closed mode")
out = ["{"]
if cert_email:
out.append(f" email {cert_email}")
if token:
out.append(" acme_dns cloudflare {env.CLOUDFLARE_API_TOKEN}")
elif allow_internal_tls_fallback:
out.append(" local_certs")
out.append("}")
out.append("")
_append_status_site(out, status_ui_port=status_ui_port, status_upstream=status_upstream)
if wildcard_domain and token:
out.append(f"{wildcard_domain}, *.{wildcard_domain} {{")
out.append(" tls {")
out.append(" dns cloudflare {env.CLOUDFLARE_API_TOKEN}")
out.append(" }")
out.append(" respond \"wildcard certificate anchor\" 204")
out.append("}")
out.append("")
if not routes:
out.append(":80 {")
out.append(" respond \"no eligible containers for caddy-autogen\" 200")
out.append("}")
return "\n".join(out) + "\n"
for route in routes:
out.append(f"{route['fqdn']} {{")
if token:
out.append(" tls {")
out.append(" dns cloudflare {env.CLOUDFLARE_API_TOKEN}")
out.append(" }")
elif allow_internal_tls_fallback:
out.append(" tls internal")
if route["path"] != "/":
out.append(" @allowed path " + route["path"] + "*")
out.append(" handle @allowed {")
if route["health_uri"]:
out.append(f" reverse_proxy {route['scheme']}://{route['upstream']} {{")
out.append(f" health_uri {route['health_uri']}")
out.append(" }")
else:
out.append(f" reverse_proxy {route['scheme']}://{route['upstream']}")
out.append(" }")
out.append(" respond \"not found\" 404")
else:
if route["health_uri"]:
out.append(f" reverse_proxy {route['scheme']}://{route['upstream']} {{")
out.append(f" health_uri {route['health_uri']}")
out.append(" }")
else:
out.append(f" reverse_proxy {route['scheme']}://{route['upstream']}")
out.append("}")
out.append("")
return "\n".join(out)
def _parse_bind_addr(value: str, default_host: str = "0.0.0.0", default_port: int = 8089) -> tuple[str, int]:
raw = str(value or "").strip()
if not raw:
return default_host, default_port
if ":" not in raw:
raise ValueError(f"invalid bind address '{raw}', expected host:port")
host, port_raw = raw.rsplit(":", 1)
if not host:
host = default_host
try:
port = int(port_raw)
except ValueError as exc:
raise ValueError(f"invalid port in bind address '{raw}'") from exc
if port < 1 or port > 65535:
raise ValueError(f"port out of range in bind address '{raw}'")
return host, port
def _verify_cloudflare_token(verify_url: str, token: str) -> dict:
now = int(time.time())
if not token:
return {
"reachable": False,
"token_valid": False,
"last_check_ts": now,
"error": "CLOUDFLARE_API_TOKEN is missing",
}
req = urllib.request.Request(
verify_url,
headers={
"Authorization": f"Bearer {token}",
"Accept": "application/json",
},
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
payload = json.loads(resp.read().decode("utf-8"))
return {
"reachable": True,
"token_valid": bool(payload.get("success", False)),
"last_check_ts": now,
"error": "",
}
except urllib.error.HTTPError as exc:
return {
"reachable": True,
"token_valid": False,
"last_check_ts": now,
"error": f"HTTP {exc.code}",
}
except urllib.error.URLError as exc:
return {
"reachable": False,
"token_valid": False,
"last_check_ts": now,
"error": f"connection failure: {exc}",
}
except Exception as exc:
return {
"reachable": False,
"token_valid": False,
"last_check_ts": now,
"error": str(exc),
}
def _collect_letsencrypt_hosts(caddy_data_dir: str) -> set[str]:
results: set[str] = set()
cert_root = os.path.join(caddy_data_dir, "caddy", "certificates")
if not os.path.isdir(cert_root):
return results
for root, _dirs, files in os.walk(cert_root):
if "letsencrypt" not in root.lower():
continue
for filename in files:
if not filename.endswith(".crt"):
continue
host = filename[:-4].lower()
if host.startswith("_."):
host = "*." + host[2:]
if host and host != "*" and (host.startswith("*.") or HOST_RE.match(host)):
results.add(host)
return results
def _has_matching_le_cert(route_fqdn: str, cert_hosts: set[str]) -> bool:
if route_fqdn in cert_hosts:
return True
for cert_host in cert_hosts:
if not cert_host.startswith("*."):
continue
suffix = cert_host[2:]
if route_fqdn == suffix or route_fqdn.endswith("." + suffix):
return True
return False
def _build_status_payload(state: dict) -> bytes:
body = json.dumps(state, separators=(",", ":"), sort_keys=True).encode("utf-8")
return body
def _start_status_server(bind_addr: str, snapshot: dict, lock: threading.Lock):
host, port = _parse_bind_addr(bind_addr)
class _Handler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path not in {"/status.json", "/healthz"}:
self.send_response(404)
self.end_headers()
return
if self.path == "/healthz":
self.send_response(200)
self.send_header("Content-Type", "text/plain; charset=utf-8")
self.end_headers()
self.wfile.write(b"ok")
return
with lock:
payload = _build_status_payload(snapshot)
self.send_response(200)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Cache-Control", "no-store")
self.send_header("Content-Length", str(len(payload)))
self.end_headers()
self.wfile.write(payload)
def log_message(self, fmt, *args):
return
server = ThreadingHTTPServer((host, port), _Handler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
_log(f"INFO: status endpoint listening on {host}:{port}")
return server, thread
def main():
defaults = _read_simple_yaml(os.getenv("CONFIG_FILE", "/app/config/defaults.yaml"))
docker_api_url = _cfg("DOCKER_API_URL", defaults, "docker_api_url", "http://socket-proxy:2375")
caddy_load_url = _cfg("CADDY_LOAD_URL", defaults, "caddy_load_url", "http://caddy:2019/load")
base_domain = str(_cfg("BASE_DOMAIN", defaults, "base_domain", "")).strip()
wildcard_domain = str(_cfg("WILDCARD_DOMAIN", defaults, "wildcard_domain", "")).strip()
cert_email = str(_cfg("CERT_EMAIL", defaults, "cert_email", "")).strip()
env_prefix = str(_cfg("ENV_PREFIX", defaults, "env_prefix", "LABEL_CADDY_")).strip()
default_scheme = str(_cfg("DEFAULT_SCHEME", defaults, "default_scheme", "http")).strip().lower()
default_path = str(_cfg("DEFAULT_PATH", defaults, "default_path", "/")).strip() or "/"
default_health_uri = str(_cfg("DEFAULT_HEALTH_URI", defaults, "default_health_uri", "")).strip()
status_bind = str(_cfg("STATUS_BIND", defaults, "status_bind", "0.0.0.0:8089")).strip()
status_ui_port_raw = _cfg("STATUS_UI_PORT", defaults, "status_ui_port", 31820)
status_upstream = str(_cfg("STATUS_UPSTREAM", defaults, "status_upstream", "discovery-agent:8089")).strip()
cf_verify_url = str(
_cfg(
"CF_VERIFY_URL",
defaults,
"cf_verify_url",
"https://api.cloudflare.com/client/v4/user/tokens/verify",
)
).strip()
caddy_data_dir = str(_cfg("CADDY_DATA_DIR", defaults, "caddy_data_dir", "/caddy-data")).strip()
try:
status_ui_port = int(status_ui_port_raw)
if status_ui_port < 1 or status_ui_port > 65535:
raise ValueError
except (TypeError, ValueError):
status_ui_port = 31820
poll_seconds_raw = _cfg("POLL_SECONDS", defaults, "poll_seconds", 15)
try:
poll_seconds = max(5, int(poll_seconds_raw))
except (TypeError, ValueError):
poll_seconds = 15
denylist_raw = str(_cfg("CONTAINER_NAME_DENYLIST", defaults, "container_name_denylist", ""))
denylist = {item.strip().lower() for item in denylist_raw.split(",") if item.strip()}
require_cloudflare = _to_bool(str(_cfg("REQUIRE_CLOUDFLARE", defaults, "require_cloudflare", "true")), True)
allow_internal_tls_fallback = _to_bool(
str(_cfg("ALLOW_INTERNAL_TLS_FALLBACK", defaults, "allow_internal_tls_fallback", "false")),
False,
)
token = os.getenv("CLOUDFLARE_API_TOKEN", "").strip()
last_digest = ""
snapshot_lock = threading.Lock()
snapshot = {
"app": {
"name": "caddy-autogen",
"status_ui_port": status_ui_port,
"status_upstream": status_upstream,
"require_cloudflare": require_cloudflare,
"allow_internal_tls_fallback": allow_internal_tls_fallback,
},
"last_tick_ts": 0,
"last_apply_ok": False,
"last_apply_http_status": 0,
"last_error": "not started",
"routes": [],
"cloudflare": {
"reachable": False,
"token_valid": False,
"last_check_ts": 0,
"error": "not checked",
},
"certs": [],
}
_start_status_server(status_bind, snapshot, snapshot_lock)
_log(
"INFO: starting caddy-autogen discovery-agent "
f"(docker_api_url={docker_api_url}, caddy_load_url={caddy_load_url}, poll_seconds={poll_seconds})"
)
while True:
tick_ts = int(time.time())
try:
routes = _collect_routes(
docker_api_url=docker_api_url,
env_prefix=env_prefix,
denylist=denylist,
base_domain=base_domain,
default_scheme=default_scheme,
default_path=default_path,
default_health_uri=default_health_uri,
)
cloudflare_status = _verify_cloudflare_token(cf_verify_url, token)
cert_hosts = _collect_letsencrypt_hosts(caddy_data_dir)
cert_rows = [
{
"fqdn": route["fqdn"],
"letsencrypt_present": _has_matching_le_cert(route["fqdn"], cert_hosts),
}
for route in routes
]
caddyfile = _generate_caddyfile(
routes=routes,
token=token,
require_cloudflare=require_cloudflare,
allow_internal_tls_fallback=allow_internal_tls_fallback,
wildcard_domain=wildcard_domain,
cert_email=cert_email,
status_ui_port=status_ui_port,
status_upstream=status_upstream,
)
digest = hashlib.sha256(caddyfile.encode("utf-8")).hexdigest()
apply_ok = True
apply_status = 0
last_error = ""
if digest != last_digest:
apply_status = _post_caddyfile(caddy_load_url, caddyfile)
_log(f"INFO: applied config (routes={len(routes)}, status={apply_status})")
last_digest = digest
else:
_log(f"INFO: no config changes (routes={len(routes)})")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = apply_ok
snapshot["last_apply_http_status"] = apply_status
snapshot["last_error"] = last_error
snapshot["routes"] = routes
snapshot["cloudflare"] = cloudflare_status
snapshot["certs"] = cert_rows
except urllib.error.HTTPError as e:
err = f"http failure {e.code} {e.reason}"
_log(f"ERROR: {err}")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = False
snapshot["last_apply_http_status"] = 0
snapshot["last_error"] = err
except urllib.error.URLError as e:
err = f"connection failure: {e}"
_log(f"ERROR: {err}")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = False
snapshot["last_apply_http_status"] = 0
snapshot["last_error"] = err
except Exception as e:
err = str(e)
_log(f"ERROR: {err}")
with snapshot_lock:
snapshot["last_tick_ts"] = tick_ts
snapshot["last_apply_ok"] = False
snapshot["last_apply_http_status"] = 0
snapshot["last_error"] = err
time.sleep(poll_seconds)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
_log("INFO: shutdown requested")
sys.exit(0)