feat(local-inference): add TTL support for automatic model unloading
Add globalTTL and per-model ttl options to llama-swap config, allowing idle models to be automatically unloaded from memory.
This commit is contained in:
@@ -30,6 +30,7 @@ with lib;
|
||||
enable = true;
|
||||
host = "zix790prors.oglehome";
|
||||
openFirewall = true;
|
||||
globalTTL = 900;
|
||||
models = {
|
||||
"Qwen3.6-35B-A3B" = {
|
||||
hf-model = "unsloth/Qwen3.6-35B-A3B-GGUF:UD-Q8_K_XL";
|
||||
|
||||
Reference in New Issue
Block a user