feat(local-inference): replace ollama with llama-swap + llama.cpp on zix790prors
- Add local-inference NixOS role using llama-swap (from nixpkgs-unstable) with llama.cpp (CUDA-enabled, from nixpkgs-unstable) - Serves Qwen3.6-35B-A3B via HuggingFace auto-download with --cpu-moe - Add nixosSpecialArgs for nixpkgs-unstable module access - Configure opencode with llama-local provider pointing to zix790prors:8080 - Update gptel from Ollama backend to OpenAI-compatible llama-swap backend - Remove ollama service from zix790prors
This commit is contained in:
@@ -99,6 +99,10 @@ in
|
||||
};
|
||||
};
|
||||
|
||||
xdg.configFile."opencode/opencode.json" = {
|
||||
source = ./opencode-config.json;
|
||||
};
|
||||
|
||||
# Note: modules must be imported at top-level home config
|
||||
};
|
||||
}
|
||||
|
||||
23
home/roles/base/opencode-config.json
Normal file
23
home/roles/base/opencode-config.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"$schema": "https://opencode.ai/config.json",
|
||||
"provider": {
|
||||
"llama-local": {
|
||||
"name": "Llama.cpp (zix790prors RTX 4070 Ti)",
|
||||
"npm": "@ai-sdk/openai-compatible",
|
||||
"options": {
|
||||
"baseURL": "http://zix790prors.oglehome:8080/v1"
|
||||
},
|
||||
"models": {
|
||||
"Qwen3.6-35B-A3B": {
|
||||
"name": "Qwen3.6-35B-A3B (UD-Q8_K_XL)",
|
||||
"reasoning": true,
|
||||
"tool_call": true,
|
||||
"limit": {
|
||||
"context": 32768,
|
||||
"output": 8192
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -233,14 +233,15 @@ rbw is unavailable or the entry is not found."
|
||||
gptel-use-tools t
|
||||
gptel-confirm-tool-calls 'always
|
||||
gptel-include-reasoning 'ignore
|
||||
gptel-model "qwen3:30b")
|
||||
gptel-model "Qwen3.6-35B-A3B")
|
||||
|
||||
;; Set default backend to be Ollama-Local
|
||||
;; Set default backend to llama-swap (OpenAI-compatible)
|
||||
(setq! gptel-backend
|
||||
(gptel-make-ollama "Ollama-Local"
|
||||
:host "localhost:11434"
|
||||
(gptel-make-openai "llama-swap"
|
||||
:host "localhost:8080"
|
||||
:endpoint "/v1/chat/completions"
|
||||
:stream t
|
||||
:models '(deepseek-r1 deepseek-r1-fullctx qwen3:30b qwen3:4b llama3.1 qwen2.5-coder mistral-nemo gpt-oss)))
|
||||
:models '("Qwen3.6-35B-A3B")))
|
||||
|
||||
;; Define custom tools
|
||||
(gptel-make-tool
|
||||
|
||||
Reference in New Issue
Block a user