diff --git a/flake.nix b/flake.nix
index c424c9f..dd0dea4 100644
--- a/flake.nix
+++ b/flake.nix
@@ -104,6 +104,11 @@
           };
         };
 
+      # Common specialArgs passed to all NixOS systems
+      nixosSpecialArgs = {
+        inherit nixpkgs-unstable;
+      };
+
       # Shared unstable overlays for custom package builds
       customUnstableOverlays = [
         # Override claude-code in unstable to use our custom GCS-based build
@@ -149,6 +154,7 @@
     in
     {
       nixosConfigurations.nix-book = nixpkgs.lib.nixosSystem rec {
+        specialArgs = nixosSpecialArgs;
         system = "x86_64-linux";
         modules = nixosModules ++ [
           ./machines/nix-book/configuration.nix
@@ -166,6 +172,7 @@
       };
 
       nixosConfigurations.boxy = nixpkgs.lib.nixosSystem rec {
+        specialArgs = nixosSpecialArgs;
         system = "x86_64-linux";
         modules = nixosModules ++ [
           ./machines/boxy/configuration.nix
@@ -179,6 +186,7 @@
       };
 
       nixosConfigurations.gym-box = nixpkgs.lib.nixosSystem rec {
+        specialArgs = nixosSpecialArgs;
         system = "x86_64-linux";
         modules = nixosModules ++ [
           ./machines/gym-box/configuration.nix
@@ -191,6 +199,7 @@
       };
 
       nixosConfigurations.zix790prors = nixpkgs.lib.nixosSystem rec {
+        specialArgs = nixosSpecialArgs;
         system = "x86_64-linux";
         modules = nixosModules ++ [
           ./machines/zix790prors/configuration.nix
@@ -212,6 +221,7 @@
 
       # Live USB ISO configuration
       nixosConfigurations.live-usb = nixpkgs.lib.nixosSystem rec {
+        specialArgs = nixosSpecialArgs;
         system = "x86_64-linux";
         modules = nixosModules ++ [
           ./machines/live-usb/configuration.nix
@@ -236,6 +246,7 @@
 
       # ZFS/NFS server configuration
       nixosConfigurations.john-endesktop = nixpkgs.lib.nixosSystem rec {
+        specialArgs = nixosSpecialArgs;
         system = "x86_64-linux";
         modules = nixosModules ++ [
           ./machines/john-endesktop/configuration.nix
diff --git a/home/roles/base/default.nix b/home/roles/base/default.nix
index 99e7b48..3529980 100644
--- a/home/roles/base/default.nix
+++ b/home/roles/base/default.nix
@@ -99,6 +99,10 @@ in
       };
     };
 
+    xdg.configFile."opencode/opencode.json" = {
+      source = ./opencode-config.json;
+    };
+
     # Note: modules must be imported at top-level home config
   };
 }
diff --git a/home/roles/base/opencode-config.json b/home/roles/base/opencode-config.json
new file mode 100644
index 0000000..369cb1d
--- /dev/null
+++ b/home/roles/base/opencode-config.json
@@ -0,0 +1,23 @@
+{
+  "$schema": "https://opencode.ai/config.json",
+  "provider": {
+    "llama-local": {
+      "name": "Llama.cpp (zix790prors RTX 4070 Ti)",
+      "npm": "@ai-sdk/openai-compatible",
+      "options": {
+        "baseURL": "http://zix790prors.oglehome:8080/v1"
+      },
+      "models": {
+        "Qwen3.6-35B-A3B": {
+          "name": "Qwen3.6-35B-A3B (UD-Q8_K_XL)",
+          "reasoning": true,
+          "tool_call": true,
+          "limit": {
+            "context": 32768,
+            "output": 8192
+          }
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/home/roles/emacs/doom/config.el b/home/roles/emacs/doom/config.el
index 3fb13ba..184a43a 100644
--- a/home/roles/emacs/doom/config.el
+++ b/home/roles/emacs/doom/config.el
@@ -233,14 +233,15 @@ rbw is unavailable or the entry is not found."
          gptel-use-tools t
          gptel-confirm-tool-calls 'always
          gptel-include-reasoning 'ignore
-         gptel-model "qwen3:30b")
+         gptel-model "Qwen3.6-35B-A3B")
 
-  ;; Set default backend to be Ollama-Local
+  ;; Set default backend to llama-swap (OpenAI-compatible)
   (setq! gptel-backend
-         (gptel-make-ollama "Ollama-Local"
-           :host "localhost:11434"
+         (gptel-make-openai "llama-swap"
+           :host "localhost:8080"
+           :endpoint "/v1/chat/completions"
            :stream t
-           :models '(deepseek-r1 deepseek-r1-fullctx qwen3:30b qwen3:4b llama3.1 qwen2.5-coder mistral-nemo gpt-oss)))
+           :models '("Qwen3.6-35B-A3B")))
 
   ;; Define custom tools
   (gptel-make-tool
diff --git a/machines/zix790prors/configuration.nix b/machines/zix790prors/configuration.nix
index e223780..f9b9ca6 100644
--- a/machines/zix790prors/configuration.nix
+++ b/machines/zix790prors/configuration.nix
@@ -26,6 +26,18 @@ with lib;
       x11 = true;
     };
     kodi.enable = true;
+    local-inference = {
+      enable = true;
+      host = "zix790prors.oglehome";
+      openFirewall = true;
+      models = {
+        "Qwen3.6-35B-A3B" = {
+          hf-model = "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL";
+          aliases = [ "Qwen3.6-35B-A3B" ];
+          cpu-moe = true;
+        };
+      };
+    };
     nfs-mounts.enable = true;
     nvidia = {
       enable = true;
@@ -56,12 +68,6 @@ with lib;
     ${pkgs.xorg.xrandr}/bin/xrandr --output DP-0 --mode 3440x1440 --rate 164.90 --primary
   '';
 
-  services.ollama = {
-    enable = true;
-    acceleration = "cuda";
-    loadModels = [ "gpt-oss" "deepseek-r1" "qwen3:30b" ];
-  };
-
   # This option defines the first version of NixOS you have installed on this particular machine,
   # and is used to maintain compatibility with application data (e.g. databases) created on older NixOS versions.
   #
diff --git a/roles/default.nix b/roles/default.nix
index 2d78600..a7937a5 100644
--- a/roles/default.nix
+++ b/roles/default.nix
@@ -11,6 +11,7 @@ with lib;
     ./desktop
     ./k3s-node
     ./kodi
+    ./local-inference
     ./nfs-mounts
     ./plasma-bigscreen
     ./nvidia
diff --git a/roles/local-inference/default.nix b/roles/local-inference/default.nix
new file mode 100644
index 0000000..a55e94c
--- /dev/null
+++ b/roles/local-inference/default.nix
@@ -0,0 +1,108 @@
+{
+  config,
+  lib,
+  pkgs,
+  nixpkgs-unstable,
+  ...
+}:
+
+with lib;
+
+let
+  cfg = config.roles.local-inference;
+  llama-cpp-cuda = pkgs.unstable.llama-cpp.override { cudaSupport = true; };
+  llama-server = getExe' llama-cpp-cuda "llama-server";
+in
+{
+  imports = [ "${nixpkgs-unstable}/nixos/modules/services/networking/llama-swap.nix" ];
+  disabledModules = [ "services/networking/llama-swap.nix" ];
+
+  options.roles.local-inference = {
+    enable = mkEnableOption "Enable local LLM inference via llama-swap + llama.cpp";
+
+    models = mkOption {
+      type = types.attrsOf (
+        types.submodule {
+          options = {
+            hf-model = mkOption {
+              type = types.str;
+              description = "HuggingFace model shorthand (e.g. unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL)";
+            };
+            aliases = mkOption {
+              type = types.listOf types.str;
+              default = [ ];
+              description = "Aliases for the model in the API";
+            };
+            n-gpu-layers = mkOption {
+              type = types.int;
+              default = 99;
+              description = "Number of layers to offload to GPU";
+            };
+            cpu-moe = mkOption {
+              type = types.bool;
+              default = false;
+              description = "Offload MoE expert layers to CPU";
+            };
+            extraArgs = mkOption {
+              type = types.listOf types.str;
+              default = [ ];
+              description = "Extra arguments passed to llama-server";
+            };
+          };
+        }
+      );
+      default = { };
+      description = "Models to serve from HuggingFace";
+    };
+
+    host = mkOption {
+      type = types.str;
+      default = "127.0.0.1";
+      description = "IP address llama-swap listens on";
+    };
+
+    port = mkOption {
+      type = types.port;
+      default = 8080;
+      description = "Port llama-swap listens on";
+    };
+
+    openFirewall = mkOption {
+      type = types.bool;
+      default = false;
+      description = "Open the server port in the firewall";
+    };
+
+    healthCheckTimeout = mkOption {
+      type = types.int;
+      default = 600;
+      description = "Seconds to wait for llama-server health check (model download can take a while)";
+    };
+  };
+
+  config = mkIf cfg.enable {
+    systemd.services.llama-swap.environment = {
+      LLAMA_CACHE = "/var/cache/llama-swap";
+      HOME = "/var/lib/llama-swap";
+    };
+
+    systemd.services.llama-swap.serviceConfig = {
+      CacheDirectory = "llama-swap";
+      StateDirectory = "llama-swap";
+    };
+
+    services.llama-swap = {
+      enable = true;
+      listenAddress = cfg.host;
+      port = cfg.port;
+      openFirewall = cfg.openFirewall;
+      settings = {
+        healthCheckTimeout = cfg.healthCheckTimeout;
+        models = mapAttrs (_: m: {
+          cmd = "${llama-server} --port \${PORT} -hf ${m.hf-model} -ngl ${toString m.n-gpu-layers} --no-webui ${optionalString m.cpu-moe "--cpu-moe"} ${concatStringsSep " " m.extraArgs}";
+          aliases = m.aliases;
+        }) cfg.models;
+      };
+    };
+  };
+}