Files
zima-apps/scripts/discover-zima-repos-and-apps.sh
T
Joachim Friberg 4b43e80f06 Updated metadata
Changed author and developer to Joachim Friberg
2026-03-20 13:15:56 +01:00

468 lines
13 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
HOST=""
USER_NAME="root"
PORT="22"
IDENTITY=""
OUT_PATH="artifacts/zima-repo-app-inventory.yaml"
SSH_TIMEOUT="12"
HTTP_TIMEOUT="20"
MAX_ZIP_BYTES="$((512 * 1024 * 1024))"
EXTRA_REPO_URLS=()
SKIP_HOST_SCAN=0
INSECURE_TLS=0
usage() {
cat <<USAGE
Usage: $0 --host <host> [options]
Options:
--host <host> ZimaOS host/IP (required)
--user <user> SSH user (default: root)
--port <port> SSH port (default: 22)
--identity <path> SSH private key path
--out <path> Output YAML (default: artifacts/zima-repo-app-inventory.yaml)
--ssh-timeout <seconds> SSH connect timeout (default: 12)
--http-timeout <seconds> HTTP timeout for repo ZIP fetch (default: 20)
--max-zip-bytes <bytes> Max ZIP size to download per repo (default: 536870912)
--repo-url <url> Additional appstore ZIP URL (repeatable)
--skip-host-scan Skip SSH host scan and only use --repo-url values
--insecure-tls Disable TLS cert verification for ZIP downloads (use only if needed)
-h, --help Show this help
Behavior:
1. SSH to host and scan likely CasaOS/ZimaOS config paths for appstore ZIP URLs.
2. Validate each URL fail-closed (reachable ZIP + parseable app structure).
3. Write repository and app inventory to YAML.
USAGE
}
while [[ $# -gt 0 ]]; do
case "$1" in
--host)
HOST="${2:-}"
shift 2
;;
--user)
USER_NAME="${2:-}"
shift 2
;;
--port)
PORT="${2:-}"
shift 2
;;
--identity)
IDENTITY="${2:-}"
shift 2
;;
--out)
OUT_PATH="${2:-}"
shift 2
;;
--ssh-timeout)
SSH_TIMEOUT="${2:-}"
shift 2
;;
--http-timeout)
HTTP_TIMEOUT="${2:-}"
shift 2
;;
--max-zip-bytes)
MAX_ZIP_BYTES="${2:-}"
shift 2
;;
--repo-url)
EXTRA_REPO_URLS+=("${2:-}")
shift 2
;;
--skip-host-scan)
SKIP_HOST_SCAN=1
shift
;;
--insecure-tls)
INSECURE_TLS=1
shift
;;
-h|--help)
usage
exit 0
;;
*)
echo "ERROR: Unknown argument: $1" >&2
usage
exit 2
;;
esac
done
if [[ -z "$HOST" ]]; then
echo "ERROR: --host is required" >&2
usage
exit 2
fi
if ! [[ "$PORT" =~ ^[0-9]+$ ]]; then
echo "ERROR: --port must be numeric" >&2
exit 2
fi
if ! [[ "$SSH_TIMEOUT" =~ ^[0-9]+$ && "$HTTP_TIMEOUT" =~ ^[0-9]+$ && "$MAX_ZIP_BYTES" =~ ^[0-9]+$ ]]; then
echo "ERROR: timeout/size values must be numeric" >&2
exit 2
fi
if [[ -n "$IDENTITY" && ! -f "$IDENTITY" ]]; then
echo "ERROR: identity file not found: $IDENTITY" >&2
exit 2
fi
tmp_dir="$(mktemp -d)"
cleanup() {
rm -rf "$tmp_dir"
}
trap cleanup EXIT
raw_scan_tsv="$tmp_dir/raw-scan.tsv"
candidate_tsv="$tmp_dir/candidates.tsv"
manual_tsv="$tmp_dir/manual.tsv"
run_remote_scan() {
local -a ssh_cmd
ssh_cmd=(ssh -o BatchMode=yes -o ConnectTimeout="$SSH_TIMEOUT" -p "$PORT")
if [[ -n "$IDENTITY" ]]; then
ssh_cmd+=(-i "$IDENTITY")
fi
ssh_cmd+=("${USER_NAME}@${HOST}" "bash -s")
"${ssh_cmd[@]}" <<'REMOTE'
set -euo pipefail
roots=(
/etc/casaos
/var/lib/casaos
/usr/local/etc/casaos
/etc/zimaos
/var/lib/zimaos
/DATA/AppData
/DATA/.casaos
)
scan_roots=()
for path in "${roots[@]}"; do
if [[ -d "$path" ]]; then
scan_roots+=("$path")
fi
done
if [[ "${#scan_roots[@]}" -eq 0 ]]; then
exit 0
fi
grep -ERHIno --binary-files=without-match \
--include='*.yaml' --include='*.yml' \
--include='*.json' --include='*.conf' --include='*.ini' --include='*.txt' \
'https?://[^"[:space:]]+\.zip(\?[^"[:space:]]*)?' "${scan_roots[@]}" 2>/dev/null \
| awk -F: '{
file=$1
line=$2
$1=""
$2=""
sub(/^::?/, "", $0)
sub(/^:/, "", $0)
printf "%s\t%s\t%s\n", file, line, $0
}'
REMOTE
}
echo "[1/3] Scanning ZimaOS host for configured appstore repositories..."
if [[ "$SKIP_HOST_SCAN" -eq 1 ]]; then
echo "INFO: --skip-host-scan enabled; skipping SSH scan." >&2
: > "$raw_scan_tsv"
else
if ! run_remote_scan > "$raw_scan_tsv"; then
if [[ "${#EXTRA_REPO_URLS[@]}" -gt 0 ]]; then
echo "WARN: SSH host scan failed; continuing with --repo-url entries only." >&2
: > "$raw_scan_tsv"
else
echo "ERROR: SSH host scan failed and no --repo-url fallback provided." >&2
exit 1
fi
fi
fi
if [[ -s "$raw_scan_tsv" ]]; then
awk -F $'\t' 'NF>=3 && !seen[$3]++ { print $1 "\t" $2 "\t" $3 }' "$raw_scan_tsv" > "$candidate_tsv"
fi
: > "$manual_tsv"
if [[ "${#EXTRA_REPO_URLS[@]}" -gt 0 ]]; then
for url in "${EXTRA_REPO_URLS[@]}"; do
[[ -z "$url" ]] && continue
printf "%s\t%s\t%s\n" "manual-input" "0" "$url" >> "$manual_tsv"
done
fi
candidate_count=0
manual_count=0
if [[ -s "$candidate_tsv" ]]; then
candidate_count="$(wc -l < "$candidate_tsv" | tr -d ' ')"
fi
if [[ -s "$manual_tsv" ]]; then
manual_count="$(wc -l < "$manual_tsv" | tr -d ' ')"
fi
if [[ "$candidate_count" -eq 0 && "$manual_count" -eq 0 ]]; then
echo "ERROR: No appstore ZIP URLs discovered on host and no --repo-url provided." >&2
echo "Hint: pass one or more --repo-url <https://...zip> if host scan misses your setup." >&2
exit 1
fi
if [[ "$INSECURE_TLS" -eq 1 ]]; then
echo "WARN: --insecure-tls enabled; certificate verification is disabled for ZIP downloads." >&2
fi
echo "[2/3] Validating repositories and extracting app inventory..."
mkdir -p "$(dirname "$OUT_PATH")"
CANDIDATE_TSV="$candidate_tsv" \
MANUAL_TSV="$manual_tsv" \
OUT_PATH="$OUT_PATH" \
HOST="$HOST" \
HTTP_TIMEOUT="$HTTP_TIMEOUT" \
MAX_ZIP_BYTES="$MAX_ZIP_BYTES" \
INSECURE_TLS="$INSECURE_TLS" \
python3 - <<'PY'
import datetime as dt
import io
import os
import re
import ssl
import sys
import urllib.request
import zipfile
from pathlib import PurePosixPath
import yaml
candidate_tsv = os.environ["CANDIDATE_TSV"]
manual_tsv = os.environ["MANUAL_TSV"]
out_path = os.environ["OUT_PATH"]
host = os.environ["HOST"]
http_timeout = int(os.environ["HTTP_TIMEOUT"])
max_zip_bytes = int(os.environ["MAX_ZIP_BYTES"])
insecure_tls = os.environ["INSECURE_TLS"] == "1"
def normalize_image_ref(ref: str) -> str:
value = (ref or "").strip().strip('"\'')
if not value:
return ""
if "@" in value:
value = value.split("@", 1)[0]
if "/" in value:
prefix, tail = value.rsplit("/", 1)
if ":" in tail:
tail = tail.split(":", 1)[0]
value = f"{prefix}/{tail}"
else:
if ":" in value:
value = value.split(":", 1)[0]
return value.lower()
def read_sources(path: str, source_type: str):
results = []
if not os.path.exists(path):
return results
with open(path, "r", encoding="utf-8") as handle:
for line in handle:
line = line.rstrip("\n")
if not line:
continue
parts = line.split("\t")
if len(parts) < 3:
continue
evidence_file, evidence_line, url = parts[0], parts[1], parts[2]
url = url.strip()
if not url:
continue
results.append(
{
"url": url,
"source": {
"type": source_type,
"evidence_file": evidence_file,
"evidence_line": int(evidence_line) if evidence_line.isdigit() else evidence_line,
},
}
)
return results
source_items = read_sources(candidate_tsv, "host_scan") + read_sources(manual_tsv, "manual_input")
if not source_items:
print("ERROR: no source items found", file=sys.stderr)
sys.exit(1)
# Keep first evidence for each URL.
repo_sources = {}
for item in source_items:
repo_sources.setdefault(item["url"], item["source"])
def fetch_zip(url: str) -> bytes:
request = urllib.request.Request(
url,
headers={"User-Agent": "zima-repo-discovery/1.0", "Accept": "application/zip,application/octet-stream,*/*"},
)
context = ssl._create_unverified_context() if insecure_tls else None
with urllib.request.urlopen(request, timeout=http_timeout, context=context) as response:
status = getattr(response, "status", 200)
if status >= 400:
raise RuntimeError(f"HTTP {status}")
payload = response.read(max_zip_bytes + 1)
if len(payload) > max_zip_bytes:
raise RuntimeError(f"ZIP larger than allowed limit ({max_zip_bytes} bytes)")
return payload
def extract_title(compose_data):
if not isinstance(compose_data, dict):
return ""
x_casaos = compose_data.get("x-casaos")
if not isinstance(x_casaos, dict):
return ""
title = x_casaos.get("title")
if isinstance(title, str):
return title.strip()
if isinstance(title, dict):
for key in ("en_US", "en_us", "en", "sv_SE", "sv_se"):
value = title.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
for value in title.values():
if isinstance(value, str) and value.strip():
return value.strip()
return ""
def extract_images(compose_data):
images = []
if not isinstance(compose_data, dict):
return images
services = compose_data.get("services")
if not isinstance(services, dict):
return images
for svc in services.values():
if not isinstance(svc, dict):
continue
image = svc.get("image")
if isinstance(image, str) and image.strip():
images.append(image.strip())
dedup = []
seen = set()
for image in images:
norm = normalize_image_ref(image)
if norm and norm not in seen:
seen.add(norm)
dedup.append(norm)
return dedup
all_repo_entries = []
app_index = {}
for repo_url, source in sorted(repo_sources.items(), key=lambda t: t[0].lower()):
entry = {
"url": repo_url,
"source": source,
"status": "error",
"error": "",
"app_count": 0,
"app_ids": [],
}
try:
payload = fetch_zip(repo_url)
with zipfile.ZipFile(io.BytesIO(payload)) as archive:
app_compose = {}
for member in archive.namelist():
member_lower = member.lower()
if not (member_lower.endswith("docker-compose.yml") or member_lower.endswith("docker-compose.yaml")):
continue
path = PurePosixPath(member)
parts = path.parts
if "Apps" not in parts:
continue
idx = parts.index("Apps")
if idx + 2 >= len(parts):
continue
app_id = parts[idx + 1]
if not app_id or app_id == "_template":
continue
app_compose.setdefault(app_id, member)
if not app_compose:
raise RuntimeError("ZIP contains no Apps/*/docker-compose.yml|yaml")
app_ids = sorted(app_compose.keys())
entry["status"] = "ok"
entry["error"] = ""
entry["app_count"] = len(app_ids)
entry["app_ids"] = app_ids
for app_id, compose_member in sorted(app_compose.items(), key=lambda t: t[0].lower()):
title = ""
images = []
try:
raw = archive.read(compose_member)
parsed = yaml.safe_load(raw.decode("utf-8", errors="replace"))
title = extract_title(parsed)
images = extract_images(parsed)
except Exception:
# Keep inventory resilient; compose parse failures should not drop app id.
title = ""
images = []
app_key = (repo_url, app_id)
app_index[app_key] = {
"app_id": app_id,
"title": title,
"repo_url": repo_url,
"images": images,
}
except Exception as exc:
entry["status"] = "error"
entry["error"] = str(exc)
all_repo_entries.append(entry)
apps = [app_index[key] for key in sorted(app_index.keys(), key=lambda t: (t[0].lower(), t[1].lower()))]
inventory = {
"schema_version": 1,
"generated_at": dt.datetime.now(dt.timezone.utc).isoformat(),
"host": host,
"repositories": all_repo_entries,
"apps": apps,
}
with open(out_path, "w", encoding="utf-8") as handle:
yaml.safe_dump(inventory, handle, sort_keys=False, allow_unicode=False)
ok_count = sum(1 for repo in all_repo_entries if repo["status"] == "ok")
err_count = len(all_repo_entries) - ok_count
print(f"Wrote {out_path}")
print(f"Repositories: total={len(all_repo_entries)} ok={ok_count} error={err_count}")
print(f"Apps discovered: {len(apps)}")
if ok_count == 0:
sys.exit(1)
PY
echo "[3/3] Done. Inventory written to: $OUT_PATH"