#!/usr/bin/env bash set -euo pipefail HOST="" USER_NAME="root" PORT="22" IDENTITY="" OUT_PATH="artifacts/zima-repo-app-inventory.yaml" SSH_TIMEOUT="12" HTTP_TIMEOUT="20" MAX_ZIP_BYTES="$((512 * 1024 * 1024))" EXTRA_REPO_URLS=() SKIP_HOST_SCAN=0 INSECURE_TLS=0 usage() { cat < [options] Options: --host ZimaOS host/IP (required) --user SSH user (default: root) --port SSH port (default: 22) --identity SSH private key path --out Output YAML (default: artifacts/zima-repo-app-inventory.yaml) --ssh-timeout SSH connect timeout (default: 12) --http-timeout HTTP timeout for repo ZIP fetch (default: 20) --max-zip-bytes Max ZIP size to download per repo (default: 536870912) --repo-url Additional appstore ZIP URL (repeatable) --skip-host-scan Skip SSH host scan and only use --repo-url values --insecure-tls Disable TLS cert verification for ZIP downloads (use only if needed) -h, --help Show this help Behavior: 1. SSH to host and scan likely CasaOS/ZimaOS config paths for appstore ZIP URLs. 2. Validate each URL fail-closed (reachable ZIP + parseable app structure). 3. Write repository and app inventory to YAML. USAGE } while [[ $# -gt 0 ]]; do case "$1" in --host) HOST="${2:-}" shift 2 ;; --user) USER_NAME="${2:-}" shift 2 ;; --port) PORT="${2:-}" shift 2 ;; --identity) IDENTITY="${2:-}" shift 2 ;; --out) OUT_PATH="${2:-}" shift 2 ;; --ssh-timeout) SSH_TIMEOUT="${2:-}" shift 2 ;; --http-timeout) HTTP_TIMEOUT="${2:-}" shift 2 ;; --max-zip-bytes) MAX_ZIP_BYTES="${2:-}" shift 2 ;; --repo-url) EXTRA_REPO_URLS+=("${2:-}") shift 2 ;; --skip-host-scan) SKIP_HOST_SCAN=1 shift ;; --insecure-tls) INSECURE_TLS=1 shift ;; -h|--help) usage exit 0 ;; *) echo "ERROR: Unknown argument: $1" >&2 usage exit 2 ;; esac done if [[ -z "$HOST" ]]; then echo "ERROR: --host is required" >&2 usage exit 2 fi if ! [[ "$PORT" =~ ^[0-9]+$ ]]; then echo "ERROR: --port must be numeric" >&2 exit 2 fi if ! [[ "$SSH_TIMEOUT" =~ ^[0-9]+$ && "$HTTP_TIMEOUT" =~ ^[0-9]+$ && "$MAX_ZIP_BYTES" =~ ^[0-9]+$ ]]; then echo "ERROR: timeout/size values must be numeric" >&2 exit 2 fi if [[ -n "$IDENTITY" && ! -f "$IDENTITY" ]]; then echo "ERROR: identity file not found: $IDENTITY" >&2 exit 2 fi tmp_dir="$(mktemp -d)" cleanup() { rm -rf "$tmp_dir" } trap cleanup EXIT raw_scan_tsv="$tmp_dir/raw-scan.tsv" candidate_tsv="$tmp_dir/candidates.tsv" manual_tsv="$tmp_dir/manual.tsv" run_remote_scan() { local -a ssh_cmd ssh_cmd=(ssh -o BatchMode=yes -o ConnectTimeout="$SSH_TIMEOUT" -p "$PORT") if [[ -n "$IDENTITY" ]]; then ssh_cmd+=(-i "$IDENTITY") fi ssh_cmd+=("${USER_NAME}@${HOST}" "bash -s") "${ssh_cmd[@]}" <<'REMOTE' set -euo pipefail roots=( /etc/casaos /var/lib/casaos /usr/local/etc/casaos /etc/zimaos /var/lib/zimaos /DATA/AppData /DATA/.casaos ) scan_roots=() for path in "${roots[@]}"; do if [[ -d "$path" ]]; then scan_roots+=("$path") fi done if [[ "${#scan_roots[@]}" -eq 0 ]]; then exit 0 fi grep -ERHIno --binary-files=without-match \ --include='*.yaml' --include='*.yml' \ --include='*.json' --include='*.conf' --include='*.ini' --include='*.txt' \ 'https?://[^"[:space:]]+\.zip(\?[^"[:space:]]*)?' "${scan_roots[@]}" 2>/dev/null \ | awk -F: '{ file=$1 line=$2 $1="" $2="" sub(/^::?/, "", $0) sub(/^:/, "", $0) printf "%s\t%s\t%s\n", file, line, $0 }' REMOTE } echo "[1/3] Scanning ZimaOS host for configured appstore repositories..." if [[ "$SKIP_HOST_SCAN" -eq 1 ]]; then echo "INFO: --skip-host-scan enabled; skipping SSH scan." >&2 : > "$raw_scan_tsv" else if ! run_remote_scan > "$raw_scan_tsv"; then if [[ "${#EXTRA_REPO_URLS[@]}" -gt 0 ]]; then echo "WARN: SSH host scan failed; continuing with --repo-url entries only." >&2 : > "$raw_scan_tsv" else echo "ERROR: SSH host scan failed and no --repo-url fallback provided." >&2 exit 1 fi fi fi if [[ -s "$raw_scan_tsv" ]]; then awk -F $'\t' 'NF>=3 && !seen[$3]++ { print $1 "\t" $2 "\t" $3 }' "$raw_scan_tsv" > "$candidate_tsv" fi : > "$manual_tsv" if [[ "${#EXTRA_REPO_URLS[@]}" -gt 0 ]]; then for url in "${EXTRA_REPO_URLS[@]}"; do [[ -z "$url" ]] && continue printf "%s\t%s\t%s\n" "manual-input" "0" "$url" >> "$manual_tsv" done fi candidate_count=0 manual_count=0 if [[ -s "$candidate_tsv" ]]; then candidate_count="$(wc -l < "$candidate_tsv" | tr -d ' ')" fi if [[ -s "$manual_tsv" ]]; then manual_count="$(wc -l < "$manual_tsv" | tr -d ' ')" fi if [[ "$candidate_count" -eq 0 && "$manual_count" -eq 0 ]]; then echo "ERROR: No appstore ZIP URLs discovered on host and no --repo-url provided." >&2 echo "Hint: pass one or more --repo-url if host scan misses your setup." >&2 exit 1 fi if [[ "$INSECURE_TLS" -eq 1 ]]; then echo "WARN: --insecure-tls enabled; certificate verification is disabled for ZIP downloads." >&2 fi echo "[2/3] Validating repositories and extracting app inventory..." mkdir -p "$(dirname "$OUT_PATH")" CANDIDATE_TSV="$candidate_tsv" \ MANUAL_TSV="$manual_tsv" \ OUT_PATH="$OUT_PATH" \ HOST="$HOST" \ HTTP_TIMEOUT="$HTTP_TIMEOUT" \ MAX_ZIP_BYTES="$MAX_ZIP_BYTES" \ INSECURE_TLS="$INSECURE_TLS" \ python3 - <<'PY' import datetime as dt import io import os import re import ssl import sys import urllib.request import zipfile from pathlib import PurePosixPath import yaml candidate_tsv = os.environ["CANDIDATE_TSV"] manual_tsv = os.environ["MANUAL_TSV"] out_path = os.environ["OUT_PATH"] host = os.environ["HOST"] http_timeout = int(os.environ["HTTP_TIMEOUT"]) max_zip_bytes = int(os.environ["MAX_ZIP_BYTES"]) insecure_tls = os.environ["INSECURE_TLS"] == "1" def normalize_image_ref(ref: str) -> str: value = (ref or "").strip().strip('"\'') if not value: return "" if "@" in value: value = value.split("@", 1)[0] if "/" in value: prefix, tail = value.rsplit("/", 1) if ":" in tail: tail = tail.split(":", 1)[0] value = f"{prefix}/{tail}" else: if ":" in value: value = value.split(":", 1)[0] return value.lower() def read_sources(path: str, source_type: str): results = [] if not os.path.exists(path): return results with open(path, "r", encoding="utf-8") as handle: for line in handle: line = line.rstrip("\n") if not line: continue parts = line.split("\t") if len(parts) < 3: continue evidence_file, evidence_line, url = parts[0], parts[1], parts[2] url = url.strip() if not url: continue results.append( { "url": url, "source": { "type": source_type, "evidence_file": evidence_file, "evidence_line": int(evidence_line) if evidence_line.isdigit() else evidence_line, }, } ) return results source_items = read_sources(candidate_tsv, "host_scan") + read_sources(manual_tsv, "manual_input") if not source_items: print("ERROR: no source items found", file=sys.stderr) sys.exit(1) # Keep first evidence for each URL. repo_sources = {} for item in source_items: repo_sources.setdefault(item["url"], item["source"]) def fetch_zip(url: str) -> bytes: request = urllib.request.Request( url, headers={"User-Agent": "zima-repo-discovery/1.0", "Accept": "application/zip,application/octet-stream,*/*"}, ) context = ssl._create_unverified_context() if insecure_tls else None with urllib.request.urlopen(request, timeout=http_timeout, context=context) as response: status = getattr(response, "status", 200) if status >= 400: raise RuntimeError(f"HTTP {status}") payload = response.read(max_zip_bytes + 1) if len(payload) > max_zip_bytes: raise RuntimeError(f"ZIP larger than allowed limit ({max_zip_bytes} bytes)") return payload def extract_title(compose_data): if not isinstance(compose_data, dict): return "" x_casaos = compose_data.get("x-casaos") if not isinstance(x_casaos, dict): return "" title = x_casaos.get("title") if isinstance(title, str): return title.strip() if isinstance(title, dict): for key in ("en_US", "en_us", "en", "sv_SE", "sv_se"): value = title.get(key) if isinstance(value, str) and value.strip(): return value.strip() for value in title.values(): if isinstance(value, str) and value.strip(): return value.strip() return "" def extract_images(compose_data): images = [] if not isinstance(compose_data, dict): return images services = compose_data.get("services") if not isinstance(services, dict): return images for svc in services.values(): if not isinstance(svc, dict): continue image = svc.get("image") if isinstance(image, str) and image.strip(): images.append(image.strip()) dedup = [] seen = set() for image in images: norm = normalize_image_ref(image) if norm and norm not in seen: seen.add(norm) dedup.append(norm) return dedup all_repo_entries = [] app_index = {} for repo_url, source in sorted(repo_sources.items(), key=lambda t: t[0].lower()): entry = { "url": repo_url, "source": source, "status": "error", "error": "", "app_count": 0, "app_ids": [], } try: payload = fetch_zip(repo_url) with zipfile.ZipFile(io.BytesIO(payload)) as archive: app_compose = {} for member in archive.namelist(): member_lower = member.lower() if not (member_lower.endswith("docker-compose.yml") or member_lower.endswith("docker-compose.yaml")): continue path = PurePosixPath(member) parts = path.parts if "Apps" not in parts: continue idx = parts.index("Apps") if idx + 2 >= len(parts): continue app_id = parts[idx + 1] if not app_id or app_id == "_template": continue app_compose.setdefault(app_id, member) if not app_compose: raise RuntimeError("ZIP contains no Apps/*/docker-compose.yml|yaml") app_ids = sorted(app_compose.keys()) entry["status"] = "ok" entry["error"] = "" entry["app_count"] = len(app_ids) entry["app_ids"] = app_ids for app_id, compose_member in sorted(app_compose.items(), key=lambda t: t[0].lower()): title = "" images = [] try: raw = archive.read(compose_member) parsed = yaml.safe_load(raw.decode("utf-8", errors="replace")) title = extract_title(parsed) images = extract_images(parsed) except Exception: # Keep inventory resilient; compose parse failures should not drop app id. title = "" images = [] app_key = (repo_url, app_id) app_index[app_key] = { "app_id": app_id, "title": title, "repo_url": repo_url, "images": images, } except Exception as exc: entry["status"] = "error" entry["error"] = str(exc) all_repo_entries.append(entry) apps = [app_index[key] for key in sorted(app_index.keys(), key=lambda t: (t[0].lower(), t[1].lower()))] inventory = { "schema_version": 1, "generated_at": dt.datetime.now(dt.timezone.utc).isoformat(), "host": host, "repositories": all_repo_entries, "apps": apps, } with open(out_path, "w", encoding="utf-8") as handle: yaml.safe_dump(inventory, handle, sort_keys=False, allow_unicode=False) ok_count = sum(1 for repo in all_repo_entries if repo["status"] == "ok") err_count = len(all_repo_entries) - ok_count print(f"Wrote {out_path}") print(f"Repositories: total={len(all_repo_entries)} ok={ok_count} error={err_count}") print(f"Apps discovered: {len(apps)}") if ok_count == 0: sys.exit(1) PY echo "[3/3] Done. Inventory written to: $OUT_PATH"