Compare commits

...

7 Commits

Author SHA1 Message Date
5a82554884 feat(opencode): add oh-my-openagent plugin with omo config for ollama-cloud/glm-5.1
Some checks failed
CI / check (push) Failing after 2m8s
CI / build-and-cache (push) Has been skipped
Configure oh-my-openagent (omo) plugin for multi-agent orchestration
using ollama-cloud and local llama-swap providers. Primary model is
ollama-cloud/glm-5.1 with fallback chains. Add runtime fallback,
background task concurrency limits, and disable incompatible agents
(hephaestus, multimodal-looker).
2026-04-17 13:43:08 -07:00
170a27310e feat(local-inference): add TTL support for automatic model unloading
Some checks failed
CI / check (push) Failing after 1m44s
CI / build-and-cache (push) Has been skipped
Add globalTTL and per-model ttl options to llama-swap config,
allowing idle models to be automatically unloaded from memory.
2026-04-16 15:37:02 -07:00
bd377676ed fix(opencode): increase context/output limits for local model
Some checks failed
CI / check (push) Failing after 1m59s
CI / build-and-cache (push) Has been skipped
2026-04-16 15:20:51 -07:00
10efafd92e feat(local-inference): replace ollama with llama-swap + llama.cpp on zix790prors
- Add local-inference NixOS role using llama-swap (from nixpkgs-unstable)
  with llama.cpp (CUDA-enabled, from nixpkgs-unstable)
- Serves Qwen3.6-35B-A3B via HuggingFace auto-download with --cpu-moe
- Add nixosSpecialArgs for nixpkgs-unstable module access
- Configure opencode with llama-local provider pointing to zix790prors:8080
- Update gptel from Ollama backend to OpenAI-compatible llama-swap backend
- Remove ollama service from zix790prors
2026-04-16 15:20:37 -07:00
d16c8aa67e Merge pull request 'feat(app-launcher): workout card launcher + URL args' (#54) from ash/workout-card-launcher into main
All checks were successful
CI / check (push) Successful in 1m40s
CI / build-and-cache (push) Successful in 3h10m42s
Reviewed-on: #54
2026-04-13 17:13:41 -07:00
Ash
f3b405959e feat(app-launcher): add /workout endpoint and URL arg support
All checks were successful
CI / check (pull_request) Successful in 1m46s
CI / build-and-cache (pull_request) Has been skipped
- POST /workout opens today's workout card in Firefox (auto-generates URL from date)
- POST /launch/firefox now accepts optional JSON body: {"args": ["https://..."}
- When args are provided, Firefox launches a new instance even if already running
- Updated GET / endpoint with endpoint documentation
2026-04-13 13:58:40 -07:00
e9fd4cf0d6 feat(gym-box): update for new uuids and add swap
Some checks failed
CI / check (push) Successful in 1m24s
CI / build-and-cache (push) Failing after 6h14m31s
2026-04-10 17:59:06 -07:00
10 changed files with 443 additions and 42 deletions

View File

@@ -104,6 +104,11 @@
}; };
}; };
# Common specialArgs passed to all NixOS systems
nixosSpecialArgs = {
inherit nixpkgs-unstable;
};
# Shared unstable overlays for custom package builds # Shared unstable overlays for custom package builds
customUnstableOverlays = [ customUnstableOverlays = [
# Override claude-code in unstable to use our custom GCS-based build # Override claude-code in unstable to use our custom GCS-based build
@@ -149,6 +154,7 @@
in in
{ {
nixosConfigurations.nix-book = nixpkgs.lib.nixosSystem rec { nixosConfigurations.nix-book = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/nix-book/configuration.nix ./machines/nix-book/configuration.nix
@@ -166,6 +172,7 @@
}; };
nixosConfigurations.boxy = nixpkgs.lib.nixosSystem rec { nixosConfigurations.boxy = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/boxy/configuration.nix ./machines/boxy/configuration.nix
@@ -179,6 +186,7 @@
}; };
nixosConfigurations.gym-box = nixpkgs.lib.nixosSystem rec { nixosConfigurations.gym-box = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/gym-box/configuration.nix ./machines/gym-box/configuration.nix
@@ -191,6 +199,7 @@
}; };
nixosConfigurations.zix790prors = nixpkgs.lib.nixosSystem rec { nixosConfigurations.zix790prors = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/zix790prors/configuration.nix ./machines/zix790prors/configuration.nix
@@ -212,6 +221,7 @@
# Live USB ISO configuration # Live USB ISO configuration
nixosConfigurations.live-usb = nixpkgs.lib.nixosSystem rec { nixosConfigurations.live-usb = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/live-usb/configuration.nix ./machines/live-usb/configuration.nix
@@ -236,6 +246,7 @@
# ZFS/NFS server configuration # ZFS/NFS server configuration
nixosConfigurations.john-endesktop = nixpkgs.lib.nixosSystem rec { nixosConfigurations.john-endesktop = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/john-endesktop/configuration.nix ./machines/john-endesktop/configuration.nix
@@ -283,6 +294,7 @@
"custom-tea-rbw" = pkgs.custom.tea-rbw; "custom-tea-rbw" = pkgs.custom.tea-rbw;
"custom-rclone-torbox-setup" = pkgs.custom.rclone-torbox-setup; "custom-rclone-torbox-setup" = pkgs.custom.rclone-torbox-setup;
"custom-opencode" = pkgs.custom.opencode; "custom-opencode" = pkgs.custom.opencode;
"qt-pinned-jellyfin-media-player" = pkgsQt.jellyfin-media-player; "qt-pinned-jellyfin-media-player" = pkgsQt.jellyfin-media-player;
"qt-pinned-stremio" = pkgsQt.stremio; "qt-pinned-stremio" = pkgsQt.stremio;
} }

View File

@@ -99,6 +99,14 @@ in
}; };
}; };
xdg.configFile."opencode/opencode.json" = {
source = ./opencode-config.json;
};
xdg.configFile."opencode/oh-my-openagent.jsonc" = {
source = ./opencode-omo-config.jsonc;
};
# Note: modules must be imported at top-level home config # Note: modules must be imported at top-level home config
}; };
} }

View File

@@ -0,0 +1,24 @@
{
"$schema": "https://opencode.ai/config.json",
"plugin": ["oh-my-openagent"],
"provider": {
"llama-local": {
"name": "Llama.cpp (zix790prors RTX 4070 Ti)",
"npm": "@ai-sdk/openai-compatible",
"options": {
"baseURL": "http://zix790prors.oglehome:8080/v1"
},
"models": {
"Qwen3.6-35B-A3B": {
"name": "Qwen3.6-35B-A3B (UD-Q8_K_XL)",
"reasoning": true,
"tool_call": true,
"limit": {
"context": 131072,
"output": 32768
}
}
}
}
}
}

View File

@@ -0,0 +1,136 @@
{
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": {
"sisyphus": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"llama-local/Qwen3.6-35B-A3B",
"ollama-cloud/qwen3-coder-next"
]
},
"prometheus": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"ollama-cloud/qwen3-coder-next"
]
},
"atlas": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/gemma4:31b",
"ollama-cloud/kimi-k2.5"
]
},
"explore": {
"model": "ollama-cloud/gemma4:31b",
"fallback_models": [
"ollama-cloud/ministral-3:14b",
"llama-local/Qwen3.6-35B-A3B"
]
},
"librarian": {
"model": "ollama-cloud/gemma4:31b",
"fallback_models": [
"ollama-cloud/ministral-3:14b"
]
},
"oracle": {
"model": "ollama-cloud/qwen3-coder-next",
"fallback_models": [
"ollama-cloud/deepseek-v3.2",
"ollama-cloud/glm-5.1"
]
},
"multimodal-looker": {
"disable": true
},
"hephaestus": {
"disable": true
},
"momus": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/qwen3-coder-next"
]
},
"metis": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5"
]
}
},
"categories": {
"quick": {
"model": "ollama-cloud/gemma4:31b",
"fallback_models": [
"ollama-cloud/ministral-3:14b"
]
},
"unspecified-low": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"llama-local/Qwen3.6-35B-A3B"
]
},
"unspecified-high": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"ollama-cloud/qwen3-coder-next"
]
},
"deep": {
"model": "ollama-cloud/qwen3-coder-next",
"fallback_models": [
"ollama-cloud/deepseek-v3.2",
"ollama-cloud/glm-5.1"
]
},
"ultrabrain": {
"model": "ollama-cloud/qwen3-coder-next",
"fallback_models": [
"ollama-cloud/deepseek-v3.2",
"ollama-cloud/glm-5.1"
]
},
"writing": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5"
]
},
"visual-engineering": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/qwen3-coder-next"
]
}
},
"runtime_fallback": {
"enabled": true,
"retry_on_errors": [400, 429, 503, 529],
"max_fallback_attempts": 3,
"cooldown_seconds": 60,
"notify_on_fallback": true
},
"background_task": {
"defaultConcurrency": 5,
"providerConcurrency": {
"ollama-cloud": 10,
"llama-local": 2
}
},
"disabled_hooks": ["no-sisyphus-gpt"],
"comment_checker": {
"custom_prompt": "Check for AI-generated filler phrases, redundant obvious statements, and excessively verbose explanations. Comments should add value beyond what the code itself expresses. Flag: 'TODO' without ticket references, 'Note that...' when obvious, repeating the function name in the comment, and any form of 'simply' or 'simply just'. Use {{comments}} placeholder."
},
"tmux": { "enabled": false },
"experimental": {
"aggressive_truncation": true,
"task_system": true
}
}

View File

@@ -233,14 +233,15 @@ rbw is unavailable or the entry is not found."
gptel-use-tools t gptel-use-tools t
gptel-confirm-tool-calls 'always gptel-confirm-tool-calls 'always
gptel-include-reasoning 'ignore gptel-include-reasoning 'ignore
gptel-model "qwen3:30b") gptel-model "Qwen3.6-35B-A3B")
;; Set default backend to be Ollama-Local ;; Set default backend to llama-swap (OpenAI-compatible)
(setq! gptel-backend (setq! gptel-backend
(gptel-make-ollama "Ollama-Local" (gptel-make-openai "llama-swap"
:host "localhost:11434" :host "localhost:8080"
:endpoint "/v1/chat/completions"
:stream t :stream t
:models '(deepseek-r1 deepseek-r1-fullctx qwen3:30b qwen3:4b llama3.1 qwen2.5-coder mistral-nemo gpt-oss))) :models '("Qwen3.6-35B-A3B")))
;; Define custom tools ;; Define custom tools
(gptel-make-tool (gptel-make-tool

View File

@@ -1,30 +1,46 @@
# Do not modify this file! It was generated by nixos-generate-config # Do not modify this file! It was generated by nixos-generate-config
# and may be overwritten by future invocations. Please make changes # and may be overwritten by future invocations. Please make changes
# to /etc/nixos/configuration.nix instead. # to /etc/nixos/configuration.nix instead.
{ config, lib, pkgs, modulesPath, ... }: {
config,
lib,
pkgs,
modulesPath,
...
}:
{ {
imports = imports = [
[ (modulesPath + "/installer/scan/not-detected.nix") (modulesPath + "/installer/scan/not-detected.nix")
]; ];
boot.initrd.availableKernelModules = [ "nvme" "xhci_pci" "thunderbolt" "uas" "usbhid" "usb_storage" "sd_mod" ]; boot.initrd.availableKernelModules = [
"nvme"
"xhci_pci"
"thunderbolt"
"usbhid"
"uas"
"sd_mod"
];
boot.initrd.kernelModules = [ ]; boot.initrd.kernelModules = [ ];
boot.kernelModules = [ "kvm-amd" ]; boot.kernelModules = [ "kvm-amd" ];
boot.extraModulePackages = [ ]; boot.extraModulePackages = [ ];
fileSystems."/" = fileSystems."/" = {
{ device = "/dev/disk/by-uuid/59c0df78-c6fa-415d-8592-13547a3fada6"; device = "/dev/disk/by-uuid/0e75a66e-6c9e-471e-8bd2-fee7b27b74a1";
fsType = "btrfs"; fsType = "btrfs";
}; };
fileSystems."/boot" = fileSystems."/boot" = {
{ device = "/dev/disk/by-uuid/DC66-D04C"; device = "/dev/disk/by-uuid/9E2C-F187";
fsType = "vfat"; fsType = "vfat";
options = [ "fmask=0022" "dmask=0022" ]; options = [
"fmask=0022"
"dmask=0022"
];
}; };
swapDevices = [ ]; swapDevices = [ { device = "/.swapfile"; } ];
nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;

View File

@@ -26,6 +26,19 @@ with lib;
x11 = true; x11 = true;
}; };
kodi.enable = true; kodi.enable = true;
local-inference = {
enable = true;
host = "zix790prors.oglehome";
openFirewall = true;
globalTTL = 900;
models = {
"Qwen3.6-35B-A3B" = {
hf-model = "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL";
aliases = [ "Qwen3.6-35B-A3B" ];
cpu-moe = true;
};
};
};
nfs-mounts.enable = true; nfs-mounts.enable = true;
nvidia = { nvidia = {
enable = true; enable = true;
@@ -56,12 +69,6 @@ with lib;
${pkgs.xorg.xrandr}/bin/xrandr --output DP-0 --mode 3440x1440 --rate 164.90 --primary ${pkgs.xorg.xrandr}/bin/xrandr --output DP-0 --mode 3440x1440 --rate 164.90 --primary
''; '';
services.ollama = {
enable = true;
acceleration = "cuda";
loadModels = [ "gpt-oss" "deepseek-r1" "qwen3:30b" ];
};
# This option defines the first version of NixOS you have installed on this particular machine, # This option defines the first version of NixOS you have installed on this particular machine,
# and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions. # and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions.
# #

View File

@@ -5,6 +5,7 @@ import logging
import os import os
import subprocess import subprocess
import sys import sys
from datetime import date
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import urlparse from urllib.parse import urlparse
import psutil import psutil
@@ -22,6 +23,9 @@ ALLOWED_APPS = {
'kodi': 'kodi' 'kodi': 'kodi'
} }
# Workout card base URL
WORKOUT_CARD_BASE_URL = 'https://ogle.fyi/ash/workout'
def is_app_running(app_name): def is_app_running(app_name):
"""Check if an application is already running, returns (is_running, pid)""" """Check if an application is already running, returns (is_running, pid)"""
command = ALLOWED_APPS.get(app_name) command = ALLOWED_APPS.get(app_name)
@@ -88,7 +92,10 @@ class AppLauncherHandler(BaseHTTPRequestHandler):
response = { response = {
'status': 'running', 'status': 'running',
'available_apps': list(ALLOWED_APPS.keys()), 'available_apps': list(ALLOWED_APPS.keys()),
'usage': 'POST /launch/<app_name> to launch an application' 'endpoints': {
'POST /launch/<app_name>': 'Launch an application (optional JSON body: {"args": ["url"]})',
'POST /workout': 'Open today\'s workout card in Firefox'
}
} }
self.wfile.write(json.dumps(response, indent=2).encode()) self.wfile.write(json.dumps(response, indent=2).encode())
else: else:
@@ -101,8 +108,21 @@ class AppLauncherHandler(BaseHTTPRequestHandler):
if len(path_parts) == 2 and path_parts[0] == 'launch': if len(path_parts) == 2 and path_parts[0] == 'launch':
app_name = path_parts[1] app_name = path_parts[1]
self.launch_app(app_name) self.launch_app(app_name)
elif len(path_parts) == 1 and path_parts[0] == 'workout':
self.open_workout_card()
else: else:
self.send_error(404, "Invalid endpoint. Use /launch/<app_name>") self.send_error(404, "Invalid endpoint. Use /launch/<app_name> or /workout")
def read_post_body(self):
"""Read and parse JSON body from POST request, return dict or empty dict."""
content_length = int(self.headers.get('Content-Length', 0))
if content_length > 0:
try:
body = self.rfile.read(content_length)
return json.loads(body.decode('utf-8'))
except (json.JSONDecodeError, UnicodeDecodeError) as e:
logger.warning(f"Failed to parse POST body as JSON: {e}")
return {}
def launch_app(self, app_name): def launch_app(self, app_name):
if app_name not in ALLOWED_APPS: if app_name not in ALLOWED_APPS:
@@ -111,9 +131,23 @@ class AppLauncherHandler(BaseHTTPRequestHandler):
command = ALLOWED_APPS[app_name] command = ALLOWED_APPS[app_name]
# Read optional args from POST body
body = self.read_post_body()
extra_args = body.get('args', [])
# Validate args are strings
if not isinstance(extra_args, list) or not all(isinstance(a, str) for a in extra_args):
self.send_error(400, "'args' must be a list of strings")
return
full_command = [command] + extra_args
# Check if app is already running # Check if app is already running
is_running, existing_pid = is_app_running(app_name) is_running, existing_pid = is_app_running(app_name)
if is_running: if is_running:
# If extra args provided, still launch a new instance (e.g., open a URL)
if extra_args:
logger.info(f"Application {app_name} already running (PID: {existing_pid}), but extra args provided — launching new instance")
else:
logger.info(f"Application {app_name} is already running (PID: {existing_pid}), skipping launch") logger.info(f"Application {app_name} is already running (PID: {existing_pid}), skipping launch")
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header('Content-type', 'application/json')
@@ -132,9 +166,9 @@ class AppLauncherHandler(BaseHTTPRequestHandler):
# Ensure we have the proper environment for GUI apps # Ensure we have the proper environment for GUI apps
env = os.environ.copy() env = os.environ.copy()
logger.info(f"Launching application: {command}") logger.info(f"Launching application: {' '.join(full_command)}")
process = subprocess.Popen( process = subprocess.Popen(
[command], full_command,
env=env, env=env,
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
@@ -159,12 +193,50 @@ class AppLauncherHandler(BaseHTTPRequestHandler):
logger.error(f"Error launching {command}: {e}") logger.error(f"Error launching {command}: {e}")
self.send_error(500, f"Failed to launch {app_name}: {str(e)}") self.send_error(500, f"Failed to launch {app_name}: {str(e)}")
def open_workout_card(self):
"""Open today's workout card in Firefox."""
today = date.today().strftime('%Y-%m-%d')
url = f"{WORKOUT_CARD_BASE_URL}/{today}.html"
logger.info(f"Opening workout card for {today}: {url}")
# Always launch Firefox with the URL, even if already running
command = ALLOWED_APPS['firefox']
env = os.environ.copy()
try:
process = subprocess.Popen(
[command, url],
env=env,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
start_new_session=True
)
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
response = {
'status': 'success',
'message': f'Opened workout card in Firefox',
'url': url,
'pid': process.pid
}
self.wfile.write(json.dumps(response).encode())
except FileNotFoundError:
logger.error(f"Firefox not found: {command}")
self.send_error(500, "Firefox not found on system")
except Exception as e:
logger.error(f"Error launching Firefox with workout card: {e}")
self.send_error(500, f"Failed to open workout card: {str(e)}")
def main(): def main():
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8081 port = int(sys.argv[1]) if len(sys.argv) > 1 else 8081
server = HTTPServer(('0.0.0.0', port), AppLauncherHandler) server = HTTPServer(('0.0.0.0', port), AppLauncherHandler)
logger.info(f"App launcher server starting on port {port}") logger.info(f"App launcher server starting on port {port}")
logger.info(f"Available applications: {list(ALLOWED_APPS.keys())}") logger.info(f"Available applications: {list(ALLOWED_APPS.keys())}")
logger.info(f"Workout card URL: {WORKOUT_CARD_BASE_URL}/<date>.html")
try: try:
server.serve_forever() server.serve_forever()

View File

@@ -11,6 +11,7 @@ with lib;
./desktop ./desktop
./k3s-node ./k3s-node
./kodi ./kodi
./local-inference
./nfs-mounts ./nfs-mounts
./plasma-bigscreen ./plasma-bigscreen
./nvidia ./nvidia

View File

@@ -0,0 +1,124 @@
{
config,
lib,
pkgs,
nixpkgs-unstable,
...
}:
with lib;
let
cfg = config.roles.local-inference;
llama-cpp-cuda = pkgs.unstable.llama-cpp.override { cudaSupport = true; };
llama-server = getExe' llama-cpp-cuda "llama-server";
in
{
imports = [ "${nixpkgs-unstable}/nixos/modules/services/networking/llama-swap.nix" ];
disabledModules = [ "services/networking/llama-swap.nix" ];
options.roles.local-inference = {
enable = mkEnableOption "Enable local LLM inference via llama-swap + llama.cpp";
models = mkOption {
type = types.attrsOf (
types.submodule {
options = {
hf-model = mkOption {
type = types.str;
description = "HuggingFace model shorthand (e.g. unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL)";
};
aliases = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Aliases for the model in the API";
};
n-gpu-layers = mkOption {
type = types.int;
default = 99;
description = "Number of layers to offload to GPU";
};
cpu-moe = mkOption {
type = types.bool;
default = false;
description = "Offload MoE expert layers to CPU";
};
extraArgs = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Extra arguments passed to llama-server";
};
ttl = mkOption {
type = types.int;
default = -1;
description = "Seconds before unloading model (-1 = use global default, 0 = never unload)";
};
};
}
);
default = { };
description = "Models to serve from HuggingFace";
};
host = mkOption {
type = types.str;
default = "127.0.0.1";
description = "IP address llama-swap listens on";
};
port = mkOption {
type = types.port;
default = 8080;
description = "Port llama-swap listens on";
};
openFirewall = mkOption {
type = types.bool;
default = false;
description = "Open the server port in the firewall";
};
healthCheckTimeout = mkOption {
type = types.int;
default = 600;
description = "Seconds to wait for llama-server health check (model download can take a while)";
};
globalTTL = mkOption {
type = types.int;
default = 0;
description = "Default TTL in seconds before unloading an idle model (0 = never unload)";
};
};
config = mkIf cfg.enable {
systemd.services.llama-swap.environment = {
LLAMA_CACHE = "/var/cache/llama-swap";
HOME = "/var/lib/llama-swap";
};
systemd.services.llama-swap.serviceConfig = {
CacheDirectory = "llama-swap";
StateDirectory = "llama-swap";
};
services.llama-swap = {
enable = true;
listenAddress = cfg.host;
port = cfg.port;
openFirewall = cfg.openFirewall;
settings = {
healthCheckTimeout = cfg.healthCheckTimeout;
globalTTL = cfg.globalTTL;
models = mapAttrs (
name: m:
{
cmd = "${llama-server} --port \${PORT} -hf ${m.hf-model} -ngl ${toString m.n-gpu-layers} --no-webui ${optionalString m.cpu-moe "--cpu-moe"} ${concatStringsSep " " m.extraArgs}";
aliases = m.aliases;
}
// optionalAttrs (m.ttl != -1) { ttl = m.ttl; }
) cfg.models;
};
};
};
}