Compare commits

...

5 Commits

Author SHA1 Message Date
5a82554884 feat(opencode): add oh-my-openagent plugin with omo config for ollama-cloud/glm-5.1
Some checks failed
CI / check (push) Failing after 2m8s
CI / build-and-cache (push) Has been skipped
Configure oh-my-openagent (omo) plugin for multi-agent orchestration
using ollama-cloud and local llama-swap providers. Primary model is
ollama-cloud/glm-5.1 with fallback chains. Add runtime fallback,
background task concurrency limits, and disable incompatible agents
(hephaestus, multimodal-looker).
2026-04-17 13:43:08 -07:00
170a27310e feat(local-inference): add TTL support for automatic model unloading
Some checks failed
CI / check (push) Failing after 1m44s
CI / build-and-cache (push) Has been skipped
Add globalTTL and per-model ttl options to llama-swap config,
allowing idle models to be automatically unloaded from memory.
2026-04-16 15:37:02 -07:00
bd377676ed fix(opencode): increase context/output limits for local model
Some checks failed
CI / check (push) Failing after 1m59s
CI / build-and-cache (push) Has been skipped
2026-04-16 15:20:51 -07:00
10efafd92e feat(local-inference): replace ollama with llama-swap + llama.cpp on zix790prors
- Add local-inference NixOS role using llama-swap (from nixpkgs-unstable)
  with llama.cpp (CUDA-enabled, from nixpkgs-unstable)
- Serves Qwen3.6-35B-A3B via HuggingFace auto-download with --cpu-moe
- Add nixosSpecialArgs for nixpkgs-unstable module access
- Configure opencode with llama-local provider pointing to zix790prors:8080
- Update gptel from Ollama backend to OpenAI-compatible llama-swap backend
- Remove ollama service from zix790prors
2026-04-16 15:20:37 -07:00
d16c8aa67e Merge pull request 'feat(app-launcher): workout card launcher + URL args' (#54) from ash/workout-card-launcher into main
All checks were successful
CI / check (push) Successful in 1m40s
CI / build-and-cache (push) Successful in 3h10m42s
Reviewed-on: #54
2026-04-13 17:13:41 -07:00
8 changed files with 324 additions and 11 deletions

View File

@@ -104,6 +104,11 @@
}; };
}; };
# Common specialArgs passed to all NixOS systems
nixosSpecialArgs = {
inherit nixpkgs-unstable;
};
# Shared unstable overlays for custom package builds # Shared unstable overlays for custom package builds
customUnstableOverlays = [ customUnstableOverlays = [
# Override claude-code in unstable to use our custom GCS-based build # Override claude-code in unstable to use our custom GCS-based build
@@ -149,6 +154,7 @@
in in
{ {
nixosConfigurations.nix-book = nixpkgs.lib.nixosSystem rec { nixosConfigurations.nix-book = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/nix-book/configuration.nix ./machines/nix-book/configuration.nix
@@ -166,6 +172,7 @@
}; };
nixosConfigurations.boxy = nixpkgs.lib.nixosSystem rec { nixosConfigurations.boxy = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/boxy/configuration.nix ./machines/boxy/configuration.nix
@@ -179,6 +186,7 @@
}; };
nixosConfigurations.gym-box = nixpkgs.lib.nixosSystem rec { nixosConfigurations.gym-box = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/gym-box/configuration.nix ./machines/gym-box/configuration.nix
@@ -191,6 +199,7 @@
}; };
nixosConfigurations.zix790prors = nixpkgs.lib.nixosSystem rec { nixosConfigurations.zix790prors = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/zix790prors/configuration.nix ./machines/zix790prors/configuration.nix
@@ -212,6 +221,7 @@
# Live USB ISO configuration # Live USB ISO configuration
nixosConfigurations.live-usb = nixpkgs.lib.nixosSystem rec { nixosConfigurations.live-usb = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/live-usb/configuration.nix ./machines/live-usb/configuration.nix
@@ -236,6 +246,7 @@
# ZFS/NFS server configuration # ZFS/NFS server configuration
nixosConfigurations.john-endesktop = nixpkgs.lib.nixosSystem rec { nixosConfigurations.john-endesktop = nixpkgs.lib.nixosSystem rec {
specialArgs = nixosSpecialArgs;
system = "x86_64-linux"; system = "x86_64-linux";
modules = nixosModules ++ [ modules = nixosModules ++ [
./machines/john-endesktop/configuration.nix ./machines/john-endesktop/configuration.nix
@@ -283,6 +294,7 @@
"custom-tea-rbw" = pkgs.custom.tea-rbw; "custom-tea-rbw" = pkgs.custom.tea-rbw;
"custom-rclone-torbox-setup" = pkgs.custom.rclone-torbox-setup; "custom-rclone-torbox-setup" = pkgs.custom.rclone-torbox-setup;
"custom-opencode" = pkgs.custom.opencode; "custom-opencode" = pkgs.custom.opencode;
"qt-pinned-jellyfin-media-player" = pkgsQt.jellyfin-media-player; "qt-pinned-jellyfin-media-player" = pkgsQt.jellyfin-media-player;
"qt-pinned-stremio" = pkgsQt.stremio; "qt-pinned-stremio" = pkgsQt.stremio;
} }

View File

@@ -99,6 +99,14 @@ in
}; };
}; };
xdg.configFile."opencode/opencode.json" = {
source = ./opencode-config.json;
};
xdg.configFile."opencode/oh-my-openagent.jsonc" = {
source = ./opencode-omo-config.jsonc;
};
# Note: modules must be imported at top-level home config # Note: modules must be imported at top-level home config
}; };
} }

View File

@@ -0,0 +1,24 @@
{
"$schema": "https://opencode.ai/config.json",
"plugin": ["oh-my-openagent"],
"provider": {
"llama-local": {
"name": "Llama.cpp (zix790prors RTX 4070 Ti)",
"npm": "@ai-sdk/openai-compatible",
"options": {
"baseURL": "http://zix790prors.oglehome:8080/v1"
},
"models": {
"Qwen3.6-35B-A3B": {
"name": "Qwen3.6-35B-A3B (UD-Q8_K_XL)",
"reasoning": true,
"tool_call": true,
"limit": {
"context": 131072,
"output": 32768
}
}
}
}
}
}

View File

@@ -0,0 +1,136 @@
{
"$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
"agents": {
"sisyphus": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"llama-local/Qwen3.6-35B-A3B",
"ollama-cloud/qwen3-coder-next"
]
},
"prometheus": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"ollama-cloud/qwen3-coder-next"
]
},
"atlas": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/gemma4:31b",
"ollama-cloud/kimi-k2.5"
]
},
"explore": {
"model": "ollama-cloud/gemma4:31b",
"fallback_models": [
"ollama-cloud/ministral-3:14b",
"llama-local/Qwen3.6-35B-A3B"
]
},
"librarian": {
"model": "ollama-cloud/gemma4:31b",
"fallback_models": [
"ollama-cloud/ministral-3:14b"
]
},
"oracle": {
"model": "ollama-cloud/qwen3-coder-next",
"fallback_models": [
"ollama-cloud/deepseek-v3.2",
"ollama-cloud/glm-5.1"
]
},
"multimodal-looker": {
"disable": true
},
"hephaestus": {
"disable": true
},
"momus": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/qwen3-coder-next"
]
},
"metis": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5"
]
}
},
"categories": {
"quick": {
"model": "ollama-cloud/gemma4:31b",
"fallback_models": [
"ollama-cloud/ministral-3:14b"
]
},
"unspecified-low": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"llama-local/Qwen3.6-35B-A3B"
]
},
"unspecified-high": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5",
"ollama-cloud/qwen3-coder-next"
]
},
"deep": {
"model": "ollama-cloud/qwen3-coder-next",
"fallback_models": [
"ollama-cloud/deepseek-v3.2",
"ollama-cloud/glm-5.1"
]
},
"ultrabrain": {
"model": "ollama-cloud/qwen3-coder-next",
"fallback_models": [
"ollama-cloud/deepseek-v3.2",
"ollama-cloud/glm-5.1"
]
},
"writing": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/kimi-k2.5"
]
},
"visual-engineering": {
"model": "ollama-cloud/glm-5.1",
"fallback_models": [
"ollama-cloud/qwen3-coder-next"
]
}
},
"runtime_fallback": {
"enabled": true,
"retry_on_errors": [400, 429, 503, 529],
"max_fallback_attempts": 3,
"cooldown_seconds": 60,
"notify_on_fallback": true
},
"background_task": {
"defaultConcurrency": 5,
"providerConcurrency": {
"ollama-cloud": 10,
"llama-local": 2
}
},
"disabled_hooks": ["no-sisyphus-gpt"],
"comment_checker": {
"custom_prompt": "Check for AI-generated filler phrases, redundant obvious statements, and excessively verbose explanations. Comments should add value beyond what the code itself expresses. Flag: 'TODO' without ticket references, 'Note that...' when obvious, repeating the function name in the comment, and any form of 'simply' or 'simply just'. Use {{comments}} placeholder."
},
"tmux": { "enabled": false },
"experimental": {
"aggressive_truncation": true,
"task_system": true
}
}

View File

@@ -233,14 +233,15 @@ rbw is unavailable or the entry is not found."
gptel-use-tools t gptel-use-tools t
gptel-confirm-tool-calls 'always gptel-confirm-tool-calls 'always
gptel-include-reasoning 'ignore gptel-include-reasoning 'ignore
gptel-model "qwen3:30b") gptel-model "Qwen3.6-35B-A3B")
;; Set default backend to be Ollama-Local ;; Set default backend to llama-swap (OpenAI-compatible)
(setq! gptel-backend (setq! gptel-backend
(gptel-make-ollama "Ollama-Local" (gptel-make-openai "llama-swap"
:host "localhost:11434" :host "localhost:8080"
:endpoint "/v1/chat/completions"
:stream t :stream t
:models '(deepseek-r1 deepseek-r1-fullctx qwen3:30b qwen3:4b llama3.1 qwen2.5-coder mistral-nemo gpt-oss))) :models '("Qwen3.6-35B-A3B")))
;; Define custom tools ;; Define custom tools
(gptel-make-tool (gptel-make-tool

View File

@@ -26,6 +26,19 @@ with lib;
x11 = true; x11 = true;
}; };
kodi.enable = true; kodi.enable = true;
local-inference = {
enable = true;
host = "zix790prors.oglehome";
openFirewall = true;
globalTTL = 900;
models = {
"Qwen3.6-35B-A3B" = {
hf-model = "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL";
aliases = [ "Qwen3.6-35B-A3B" ];
cpu-moe = true;
};
};
};
nfs-mounts.enable = true; nfs-mounts.enable = true;
nvidia = { nvidia = {
enable = true; enable = true;
@@ -56,12 +69,6 @@ with lib;
${pkgs.xorg.xrandr}/bin/xrandr --output DP-0 --mode 3440x1440 --rate 164.90 --primary ${pkgs.xorg.xrandr}/bin/xrandr --output DP-0 --mode 3440x1440 --rate 164.90 --primary
''; '';
services.ollama = {
enable = true;
acceleration = "cuda";
loadModels = [ "gpt-oss" "deepseek-r1" "qwen3:30b" ];
};
# This option defines the first version of NixOS you have installed on this particular machine, # This option defines the first version of NixOS you have installed on this particular machine,
# and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions. # and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions.
# #

View File

@@ -11,6 +11,7 @@ with lib;
./desktop ./desktop
./k3s-node ./k3s-node
./kodi ./kodi
./local-inference
./nfs-mounts ./nfs-mounts
./plasma-bigscreen ./plasma-bigscreen
./nvidia ./nvidia

View File

@@ -0,0 +1,124 @@
{
config,
lib,
pkgs,
nixpkgs-unstable,
...
}:
with lib;
let
cfg = config.roles.local-inference;
llama-cpp-cuda = pkgs.unstable.llama-cpp.override { cudaSupport = true; };
llama-server = getExe' llama-cpp-cuda "llama-server";
in
{
imports = [ "${nixpkgs-unstable}/nixos/modules/services/networking/llama-swap.nix" ];
disabledModules = [ "services/networking/llama-swap.nix" ];
options.roles.local-inference = {
enable = mkEnableOption "Enable local LLM inference via llama-swap + llama.cpp";
models = mkOption {
type = types.attrsOf (
types.submodule {
options = {
hf-model = mkOption {
type = types.str;
description = "HuggingFace model shorthand (e.g. unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL)";
};
aliases = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Aliases for the model in the API";
};
n-gpu-layers = mkOption {
type = types.int;
default = 99;
description = "Number of layers to offload to GPU";
};
cpu-moe = mkOption {
type = types.bool;
default = false;
description = "Offload MoE expert layers to CPU";
};
extraArgs = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Extra arguments passed to llama-server";
};
ttl = mkOption {
type = types.int;
default = -1;
description = "Seconds before unloading model (-1 = use global default, 0 = never unload)";
};
};
}
);
default = { };
description = "Models to serve from HuggingFace";
};
host = mkOption {
type = types.str;
default = "127.0.0.1";
description = "IP address llama-swap listens on";
};
port = mkOption {
type = types.port;
default = 8080;
description = "Port llama-swap listens on";
};
openFirewall = mkOption {
type = types.bool;
default = false;
description = "Open the server port in the firewall";
};
healthCheckTimeout = mkOption {
type = types.int;
default = 600;
description = "Seconds to wait for llama-server health check (model download can take a while)";
};
globalTTL = mkOption {
type = types.int;
default = 0;
description = "Default TTL in seconds before unloading an idle model (0 = never unload)";
};
};
config = mkIf cfg.enable {
systemd.services.llama-swap.environment = {
LLAMA_CACHE = "/var/cache/llama-swap";
HOME = "/var/lib/llama-swap";
};
systemd.services.llama-swap.serviceConfig = {
CacheDirectory = "llama-swap";
StateDirectory = "llama-swap";
};
services.llama-swap = {
enable = true;
listenAddress = cfg.host;
port = cfg.port;
openFirewall = cfg.openFirewall;
settings = {
healthCheckTimeout = cfg.healthCheckTimeout;
globalTTL = cfg.globalTTL;
models = mapAttrs (
name: m:
{
cmd = "${llama-server} --port \${PORT} -hf ${m.hf-model} -ngl ${toString m.n-gpu-layers} --no-webui ${optionalString m.cpu-moe "--cpu-moe"} ${concatStringsSep " " m.extraArgs}";
aliases = m.aliases;
}
// optionalAttrs (m.ttl != -1) { ttl = m.ttl; }
) cfg.models;
};
};
};
}