Add llama-server and open-webui apps for local LLM inference
- llama-server: llama.cpp REST API server, 8G memory, port 8080 - open-webui: Chat UI connecting to llama-server, 2G memory, port 3000 - Both include x-casaos metadata for ZimaOS app store - README with model download instructions and API examples
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
name: llama-server
|
||||
|
||||
services:
|
||||
llama-server:
|
||||
image: ghcr.io/ggerganov/llama.cpp:server
|
||||
container_name: llama-server
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
TZ: Europe/Stockholm
|
||||
MODEL: llama-3.2-3b-q4_k_m.gguf
|
||||
CTX_SIZE: "2048"
|
||||
N_THREADS: "0"
|
||||
HOST: 0.0.0.0
|
||||
PORT: "8080"
|
||||
MAX_TOKENS: "512"
|
||||
ports:
|
||||
- target: 8080
|
||||
published: "8080"
|
||||
protocol: tcp
|
||||
volumes:
|
||||
- type: bind
|
||||
source: /DATA/AppData/$AppID/models
|
||||
target: /models
|
||||
- type: bind
|
||||
source: /DATA/AppData/$AppID/logs
|
||||
target: /logs
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
memory: 8G
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
x-casaos:
|
||||
envs:
|
||||
- container: MODEL
|
||||
description:
|
||||
en_us: Model filename inside /models (e.g. llama-3.2-3b-q4_k_m.gguf). Download GGUF files manually into /models.
|
||||
- container: CTX_SIZE
|
||||
description:
|
||||
en_us: Context window size in tokens
|
||||
- container: N_THREADS
|
||||
description:
|
||||
en_us: CPU threads (0 = auto-detect all cores)
|
||||
- container: MAX_TOKENS
|
||||
description:
|
||||
en_us: Maximum tokens to generate per response
|
||||
- container: TZ
|
||||
description:
|
||||
en_us: Timezone, for example Europe/Stockholm
|
||||
ports:
|
||||
- container: "8080"
|
||||
description:
|
||||
en_us: llama.cpp REST API port
|
||||
volumes:
|
||||
- container: /models
|
||||
description:
|
||||
en_us: Model GGUF files directory
|
||||
- container: /logs
|
||||
description:
|
||||
en_us: Server log output
|
||||
|
||||
x-casaos:
|
||||
architectures:
|
||||
- amd64
|
||||
- arm64
|
||||
main: llama-server
|
||||
category: ai
|
||||
author: Joachim Friberg
|
||||
developer: Joachim Friberg
|
||||
icon: https://cdn.simpleicons.org/llama
|
||||
tagline:
|
||||
en_us: CPU-only LLM inference server with REST API
|
||||
description:
|
||||
en_us: >
|
||||
Local LLM inference server using llama.cpp. Serves GGUF models via OpenAI-compatible REST API.
|
||||
CPU-only with AVX2/AVX512 optimization. Requires manual model download.
|
||||
title:
|
||||
en_us: Llama Server
|
||||
index: /
|
||||
port_map: "8080"
|
||||
Reference in New Issue
Block a user