diff --git a/flake.nix b/flake.nix index c424c9f..dd0dea4 100644 --- a/flake.nix +++ b/flake.nix @@ -104,6 +104,11 @@ }; }; + # Common specialArgs passed to all NixOS systems + nixosSpecialArgs = { + inherit nixpkgs-unstable; + }; + # Shared unstable overlays for custom package builds customUnstableOverlays = [ # Override claude-code in unstable to use our custom GCS-based build @@ -149,6 +154,7 @@ in { nixosConfigurations.nix-book = nixpkgs.lib.nixosSystem rec { + specialArgs = nixosSpecialArgs; system = "x86_64-linux"; modules = nixosModules ++ [ ./machines/nix-book/configuration.nix @@ -166,6 +172,7 @@ }; nixosConfigurations.boxy = nixpkgs.lib.nixosSystem rec { + specialArgs = nixosSpecialArgs; system = "x86_64-linux"; modules = nixosModules ++ [ ./machines/boxy/configuration.nix @@ -179,6 +186,7 @@ }; nixosConfigurations.gym-box = nixpkgs.lib.nixosSystem rec { + specialArgs = nixosSpecialArgs; system = "x86_64-linux"; modules = nixosModules ++ [ ./machines/gym-box/configuration.nix @@ -191,6 +199,7 @@ }; nixosConfigurations.zix790prors = nixpkgs.lib.nixosSystem rec { + specialArgs = nixosSpecialArgs; system = "x86_64-linux"; modules = nixosModules ++ [ ./machines/zix790prors/configuration.nix @@ -212,6 +221,7 @@ # Live USB ISO configuration nixosConfigurations.live-usb = nixpkgs.lib.nixosSystem rec { + specialArgs = nixosSpecialArgs; system = "x86_64-linux"; modules = nixosModules ++ [ ./machines/live-usb/configuration.nix @@ -236,6 +246,7 @@ # ZFS/NFS server configuration nixosConfigurations.john-endesktop = nixpkgs.lib.nixosSystem rec { + specialArgs = nixosSpecialArgs; system = "x86_64-linux"; modules = nixosModules ++ [ ./machines/john-endesktop/configuration.nix diff --git a/home/roles/base/default.nix b/home/roles/base/default.nix index 99e7b48..3529980 100644 --- a/home/roles/base/default.nix +++ b/home/roles/base/default.nix @@ -99,6 +99,10 @@ in }; }; + xdg.configFile."opencode/opencode.json" = { + source = ./opencode-config.json; + }; + # Note: modules must be imported at top-level home config }; } diff --git a/home/roles/base/opencode-config.json b/home/roles/base/opencode-config.json new file mode 100644 index 0000000..369cb1d --- /dev/null +++ b/home/roles/base/opencode-config.json @@ -0,0 +1,23 @@ +{ + "$schema": "https://opencode.ai/config.json", + "provider": { + "llama-local": { + "name": "Llama.cpp (zix790prors RTX 4070 Ti)", + "npm": "@ai-sdk/openai-compatible", + "options": { + "baseURL": "http://zix790prors.oglehome:8080/v1" + }, + "models": { + "Qwen3.6-35B-A3B": { + "name": "Qwen3.6-35B-A3B (UD-Q8_K_XL)", + "reasoning": true, + "tool_call": true, + "limit": { + "context": 32768, + "output": 8192 + } + } + } + } + } +} \ No newline at end of file diff --git a/home/roles/emacs/doom/config.el b/home/roles/emacs/doom/config.el index 3fb13ba..184a43a 100644 --- a/home/roles/emacs/doom/config.el +++ b/home/roles/emacs/doom/config.el @@ -233,14 +233,15 @@ rbw is unavailable or the entry is not found." gptel-use-tools t gptel-confirm-tool-calls 'always gptel-include-reasoning 'ignore - gptel-model "qwen3:30b") + gptel-model "Qwen3.6-35B-A3B") - ;; Set default backend to be Ollama-Local + ;; Set default backend to llama-swap (OpenAI-compatible) (setq! gptel-backend - (gptel-make-ollama "Ollama-Local" - :host "localhost:11434" + (gptel-make-openai "llama-swap" + :host "localhost:8080" + :endpoint "/v1/chat/completions" :stream t - :models '(deepseek-r1 deepseek-r1-fullctx qwen3:30b qwen3:4b llama3.1 qwen2.5-coder mistral-nemo gpt-oss))) + :models '("Qwen3.6-35B-A3B"))) ;; Define custom tools (gptel-make-tool diff --git a/machines/zix790prors/configuration.nix b/machines/zix790prors/configuration.nix index e223780..f9b9ca6 100644 --- a/machines/zix790prors/configuration.nix +++ b/machines/zix790prors/configuration.nix @@ -26,6 +26,18 @@ with lib; x11 = true; }; kodi.enable = true; + local-inference = { + enable = true; + host = "zix790prors.oglehome"; + openFirewall = true; + models = { + "Qwen3.6-35B-A3B" = { + hf-model = "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL"; + aliases = [ "Qwen3.6-35B-A3B" ]; + cpu-moe = true; + }; + }; + }; nfs-mounts.enable = true; nvidia = { enable = true; @@ -56,12 +68,6 @@ with lib; ${pkgs.xorg.xrandr}/bin/xrandr --output DP-0 --mode 3440x1440 --rate 164.90 --primary ''; - services.ollama = { - enable = true; - acceleration = "cuda"; - loadModels = [ "gpt-oss" "deepseek-r1" "qwen3:30b" ]; - }; - # This option defines the first version of NixOS you have installed on this particular machine, # and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions. # diff --git a/roles/default.nix b/roles/default.nix index 2d78600..a7937a5 100644 --- a/roles/default.nix +++ b/roles/default.nix @@ -11,6 +11,7 @@ with lib; ./desktop ./k3s-node ./kodi + ./local-inference ./nfs-mounts ./plasma-bigscreen ./nvidia diff --git a/roles/local-inference/default.nix b/roles/local-inference/default.nix new file mode 100644 index 0000000..a55e94c --- /dev/null +++ b/roles/local-inference/default.nix @@ -0,0 +1,108 @@ +{ + config, + lib, + pkgs, + nixpkgs-unstable, + ... +}: + +with lib; + +let + cfg = config.roles.local-inference; + llama-cpp-cuda = pkgs.unstable.llama-cpp.override { cudaSupport = true; }; + llama-server = getExe' llama-cpp-cuda "llama-server"; +in +{ + imports = [ "${nixpkgs-unstable}/nixos/modules/services/networking/llama-swap.nix" ]; + disabledModules = [ "services/networking/llama-swap.nix" ]; + + options.roles.local-inference = { + enable = mkEnableOption "Enable local LLM inference via llama-swap + llama.cpp"; + + models = mkOption { + type = types.attrsOf ( + types.submodule { + options = { + hf-model = mkOption { + type = types.str; + description = "HuggingFace model shorthand (e.g. unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL)"; + }; + aliases = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Aliases for the model in the API"; + }; + n-gpu-layers = mkOption { + type = types.int; + default = 99; + description = "Number of layers to offload to GPU"; + }; + cpu-moe = mkOption { + type = types.bool; + default = false; + description = "Offload MoE expert layers to CPU"; + }; + extraArgs = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Extra arguments passed to llama-server"; + }; + }; + } + ); + default = { }; + description = "Models to serve from HuggingFace"; + }; + + host = mkOption { + type = types.str; + default = "127.0.0.1"; + description = "IP address llama-swap listens on"; + }; + + port = mkOption { + type = types.port; + default = 8080; + description = "Port llama-swap listens on"; + }; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = "Open the server port in the firewall"; + }; + + healthCheckTimeout = mkOption { + type = types.int; + default = 600; + description = "Seconds to wait for llama-server health check (model download can take a while)"; + }; + }; + + config = mkIf cfg.enable { + systemd.services.llama-swap.environment = { + LLAMA_CACHE = "/var/cache/llama-swap"; + HOME = "/var/lib/llama-swap"; + }; + + systemd.services.llama-swap.serviceConfig = { + CacheDirectory = "llama-swap"; + StateDirectory = "llama-swap"; + }; + + services.llama-swap = { + enable = true; + listenAddress = cfg.host; + port = cfg.port; + openFirewall = cfg.openFirewall; + settings = { + healthCheckTimeout = cfg.healthCheckTimeout; + models = mapAttrs (_: m: { + cmd = "${llama-server} --port \${PORT} -hf ${m.hf-model} -ngl ${toString m.n-gpu-layers} --no-webui ${optionalString m.cpu-moe "--cpu-moe"} ${concatStringsSep " " m.extraArgs}"; + aliases = m.aliases; + }) cfg.models; + }; + }; + }; +}