Add resource limits to daemon (bd-152)

Implemented connection limiting, request timeouts, and memory pressure detection:

- Connection limiting with semaphore pattern (default 100 max connections)
- Request timeout enforcement on read/write (default 30s)
- Memory pressure detection with aggressive cache eviction (default 500MB threshold)
- Configurable via environment variables:
  - BEADS_DAEMON_MAX_CONNS
  - BEADS_DAEMON_REQUEST_TIMEOUT
  - BEADS_DAEMON_MEMORY_THRESHOLD_MB
- Health endpoint now exposes active/max connections and memory usage
- Comprehensive test coverage for all limits

This prevents resource exhaustion under heavy load or attack scenarios.

Amp-Thread-ID: https://ampcode.com/threads/T-44d1817a-3709-4f1d-a27a-78bb2fa4d3dc
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Steve Yegge
2025-10-19 13:22:23 -07:00
parent 73cc958ecd
commit c71da4c267
4 changed files with 464 additions and 10 deletions

View File

@@ -9,6 +9,7 @@ import (
"os"
"os/signal"
"path/filepath"
"runtime"
"sort"
"strings"
"sync"
@@ -52,6 +53,12 @@ type Server struct {
startTime time.Time
cacheHits int64
cacheMisses int64
// Connection limiting
maxConns int
activeConns int32 // atomic counter
connSemaphore chan struct{}
// Request timeout
requestTimeout time.Duration
}
// NewServer creates a new RPC server
@@ -73,14 +80,32 @@ func NewServer(socketPath string, store storage.Storage) *Server {
}
}
maxConns := 100 // default
if env := os.Getenv("BEADS_DAEMON_MAX_CONNS"); env != "" {
var conns int
if _, err := fmt.Sscanf(env, "%d", &conns); err == nil && conns > 0 {
maxConns = conns
}
}
requestTimeout := 30 * time.Second // default
if env := os.Getenv("BEADS_DAEMON_REQUEST_TIMEOUT"); env != "" {
if timeout, err := time.ParseDuration(env); err == nil && timeout > 0 {
requestTimeout = timeout
}
}
return &Server{
socketPath: socketPath,
storage: store,
storageCache: make(map[string]*StorageCacheEntry),
maxCacheSize: maxCacheSize,
cacheTTL: cacheTTL,
shutdownChan: make(chan struct{}),
startTime: time.Now(),
socketPath: socketPath,
storage: store,
storageCache: make(map[string]*StorageCacheEntry),
maxCacheSize: maxCacheSize,
cacheTTL: cacheTTL,
shutdownChan: make(chan struct{}),
startTime: time.Now(),
maxConns: maxConns,
connSemaphore: make(chan struct{}, maxConns),
requestTimeout: requestTimeout,
}
}
@@ -131,7 +156,20 @@ func (s *Server) Start(ctx context.Context) error {
return fmt.Errorf("failed to accept connection: %w", err)
}
go s.handleConnection(conn)
// Try to acquire connection slot (non-blocking)
select {
case s.connSemaphore <- struct{}{}:
// Acquired slot, handle connection
go func(c net.Conn) {
defer func() { <-s.connSemaphore }() // Release slot
atomic.AddInt32(&s.activeConns, 1)
defer atomic.AddInt32(&s.activeConns, -1)
s.handleConnection(c)
}(conn)
default:
// Max connections reached, reject immediately
conn.Close()
}
}
}
@@ -218,7 +256,7 @@ func (s *Server) handleSignals() {
s.Stop()
}
// runCleanupLoop periodically evicts stale storage connections
// runCleanupLoop periodically evicts stale storage connections and checks memory pressure
func (s *Server) runCleanupLoop() {
s.cleanupTicker = time.NewTicker(5 * time.Minute)
defer s.cleanupTicker.Stop()
@@ -226,6 +264,7 @@ func (s *Server) runCleanupLoop() {
for {
select {
case <-s.cleanupTicker.C:
s.checkMemoryPressure()
s.evictStaleStorage()
case <-s.shutdownChan:
return
@@ -233,6 +272,71 @@ func (s *Server) runCleanupLoop() {
}
}
// checkMemoryPressure monitors memory usage and triggers aggressive eviction if needed
func (s *Server) checkMemoryPressure() {
var m runtime.MemStats
runtime.ReadMemStats(&m)
// Memory thresholds (configurable via env var)
const defaultThresholdMB = 500
thresholdMB := defaultThresholdMB
if env := os.Getenv("BEADS_DAEMON_MEMORY_THRESHOLD_MB"); env != "" {
var threshold int
if _, err := fmt.Sscanf(env, "%d", &threshold); err == nil && threshold > 0 {
thresholdMB = threshold
}
}
allocMB := m.Alloc / 1024 / 1024
if allocMB > uint64(thresholdMB) {
fmt.Fprintf(os.Stderr, "Warning: High memory usage detected (%d MB), triggering aggressive cache eviction\n", allocMB)
s.aggressiveEviction()
runtime.GC() // Suggest garbage collection
}
}
// aggressiveEviction evicts 50% of cached storage to reduce memory pressure
func (s *Server) aggressiveEviction() {
toClose := []storage.Storage{}
s.cacheMu.Lock()
if len(s.storageCache) == 0 {
s.cacheMu.Unlock()
return
}
// Build sorted list by last access
type cacheItem struct {
path string
entry *StorageCacheEntry
}
items := make([]cacheItem, 0, len(s.storageCache))
for path, entry := range s.storageCache {
items = append(items, cacheItem{path, entry})
}
sort.Slice(items, func(i, j int) bool {
return items[i].entry.lastAccess.Before(items[j].entry.lastAccess)
})
// Evict oldest 50%
numToEvict := len(items) / 2
for i := 0; i < numToEvict; i++ {
toClose = append(toClose, items[i].entry.store)
delete(s.storageCache, items[i].path)
}
s.cacheMu.Unlock()
// Close without holding lock
for _, store := range toClose {
if err := store.Close(); err != nil {
fmt.Fprintf(os.Stderr, "Warning: failed to close evicted storage: %v\n", err)
}
}
}
// evictStaleStorage removes idle connections and enforces cache size limits
func (s *Server) evictStaleStorage() {
now := time.Now()
@@ -291,6 +395,11 @@ func (s *Server) handleConnection(conn net.Conn) {
writer := bufio.NewWriter(conn)
for {
// Set read deadline for the next request
if err := conn.SetReadDeadline(time.Now().Add(s.requestTimeout)); err != nil {
return
}
line, err := reader.ReadBytes('\n')
if err != nil {
return
@@ -306,6 +415,11 @@ func (s *Server) handleConnection(conn net.Conn) {
continue
}
// Set write deadline for the response
if err := conn.SetWriteDeadline(time.Now().Add(s.requestTimeout)); err != nil {
return
}
resp := s.handleRequest(&req)
s.writeResponse(writer, resp)
}
@@ -488,6 +602,10 @@ func (s *Server) handlePing(_ *Request) Response {
func (s *Server) handleHealth(req *Request) Response {
start := time.Now()
// Get memory stats for health response
var m runtime.MemStats
runtime.ReadMemStats(&m)
store, err := s.getStorageForRequest(req)
if err != nil {
data, _ := json.Marshal(HealthResponse{
@@ -541,6 +659,9 @@ func (s *Server) handleHealth(req *Request) Response {
CacheHits: atomic.LoadInt64(&s.cacheHits),
CacheMisses: atomic.LoadInt64(&s.cacheMisses),
DBResponseTime: dbResponseMs,
ActiveConns: atomic.LoadInt32(&s.activeConns),
MaxConns: s.maxConns,
MemoryAllocMB: m.Alloc / 1024 / 1024,
}
if dbError != "" {