feat(local-inference): replace ollama with llama-swap + llama.cpp on zix790prors
- Add local-inference NixOS role using llama-swap (from nixpkgs-unstable) with llama.cpp (CUDA-enabled, from nixpkgs-unstable) - Serves Qwen3.6-35B-A3B via HuggingFace auto-download with --cpu-moe - Add nixosSpecialArgs for nixpkgs-unstable module access - Configure opencode with llama-local provider pointing to zix790prors:8080 - Update gptel from Ollama backend to OpenAI-compatible llama-swap backend - Remove ollama service from zix790prors
This commit is contained in:
@@ -26,6 +26,18 @@ with lib;
|
||||
x11 = true;
|
||||
};
|
||||
kodi.enable = true;
|
||||
local-inference = {
|
||||
enable = true;
|
||||
host = "zix790prors.oglehome";
|
||||
openFirewall = true;
|
||||
models = {
|
||||
"Qwen3.6-35B-A3B" = {
|
||||
hf-model = "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL";
|
||||
aliases = [ "Qwen3.6-35B-A3B" ];
|
||||
cpu-moe = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
nfs-mounts.enable = true;
|
||||
nvidia = {
|
||||
enable = true;
|
||||
@@ -56,12 +68,6 @@ with lib;
|
||||
${pkgs.xorg.xrandr}/bin/xrandr --output DP-0 --mode 3440x1440 --rate 164.90 --primary
|
||||
'';
|
||||
|
||||
services.ollama = {
|
||||
enable = true;
|
||||
acceleration = "cuda";
|
||||
loadModels = [ "gpt-oss" "deepseek-r1" "qwen3:30b" ];
|
||||
};
|
||||
|
||||
# This option defines the first version of NixOS you have installed on this particular machine,
|
||||
# and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions.
|
||||
#
|
||||
|
||||
Reference in New Issue
Block a user