feat(local-inference): replace ollama with llama-swap + llama.cpp on zix790prors

- Add local-inference NixOS role using llama-swap (from nixpkgs-unstable)
  with llama.cpp (CUDA-enabled, from nixpkgs-unstable)
- Serves Qwen3.6-35B-A3B via HuggingFace auto-download with --cpu-moe
- Add nixosSpecialArgs for nixpkgs-unstable module access
- Configure opencode with llama-local provider pointing to zix790prors:8080
- Update gptel from Ollama backend to OpenAI-compatible llama-swap backend
- Remove ollama service from zix790prors
This commit is contained in:
2026-04-16 15:20:37 -07:00
parent d16c8aa67e
commit 10efafd92e
7 changed files with 165 additions and 11 deletions
+6 -5
View File
@@ -233,14 +233,15 @@ rbw is unavailable or the entry is not found."
gptel-use-tools t
gptel-confirm-tool-calls 'always
gptel-include-reasoning 'ignore
gptel-model "qwen3:30b")
gptel-model "Qwen3.6-35B-A3B")
;; Set default backend to be Ollama-Local
;; Set default backend to llama-swap (OpenAI-compatible)
(setq! gptel-backend
(gptel-make-ollama "Ollama-Local"
:host "localhost:11434"
(gptel-make-openai "llama-swap"
:host "localhost:8080"
:endpoint "/v1/chat/completions"
:stream t
:models '(deepseek-r1 deepseek-r1-fullctx qwen3:30b qwen3:4b llama3.1 qwen2.5-coder mistral-nemo gpt-oss)))
:models '("Qwen3.6-35B-A3B")))
;; Define custom tools
(gptel-make-tool