feat(local-inference): add TTL support for automatic model unloading

Add globalTTL and per-model ttl options to llama-swap config, allowing idle models to be automatically unloaded from memory.
2026-04-16 15:37:02 -07:00
parent bd377676ed
commit 170a27310e
2 changed files with 21 additions and 4 deletions
--- a/machines/zix790prors/configuration.nix
+++ b/machines/zix790prors/configuration.nix
@@ -30,6 +30,7 @@ with lib;
      enable = true;
      host = "zix790prors.oglehome";
      openFirewall = true;
+      globalTTL = 900;
      models = {
        "Qwen3.6-35B-A3B" = {
          hf-model = "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL";