[client] Fix semaphore slot leaks (#5018)

- Remove WaitGroup, make SemaphoreGroup a pure semaphore
- Make Add() return error instead of silently failing on context cancel
- Remove context parameter from Done() to prevent slot leaks
- Fix missing Done() call in conn.go error path
This commit is contained in:
Zoltan Papp
2026-01-03 09:10:02 +01:00
committed by GitHub
parent 7ac65bf1ad
commit 9ba067391f
3 changed files with 89 additions and 77 deletions

View File

@@ -148,13 +148,15 @@ func NewConn(config ConnConfig, services ServiceDependencies) (*Conn, error) {
// It will try to establish a connection using ICE and in parallel with relay. The higher priority connection type will // It will try to establish a connection using ICE and in parallel with relay. The higher priority connection type will
// be used. // be used.
func (conn *Conn) Open(engineCtx context.Context) error { func (conn *Conn) Open(engineCtx context.Context) error {
conn.semaphore.Add(engineCtx) if err := conn.semaphore.Add(engineCtx); err != nil {
return err
}
conn.mu.Lock() conn.mu.Lock()
defer conn.mu.Unlock() defer conn.mu.Unlock()
if conn.opened { if conn.opened {
conn.semaphore.Done(engineCtx) conn.semaphore.Done()
return nil return nil
} }
@@ -165,6 +167,7 @@ func (conn *Conn) Open(engineCtx context.Context) error {
relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally() relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally()
workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally) workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally)
if err != nil { if err != nil {
conn.semaphore.Done()
return err return err
} }
conn.workerICE = workerICE conn.workerICE = workerICE
@@ -200,7 +203,7 @@ func (conn *Conn) Open(engineCtx context.Context) error {
defer conn.wg.Done() defer conn.wg.Done()
conn.waitInitialRandomSleepTime(conn.ctx) conn.waitInitialRandomSleepTime(conn.ctx)
conn.semaphore.Done(conn.ctx) conn.semaphore.Done()
conn.guard.Start(conn.ctx, conn.onGuardEvent) conn.guard.Start(conn.ctx, conn.onGuardEvent)
}() }()

View File

@@ -2,12 +2,10 @@ package semaphoregroup
import ( import (
"context" "context"
"sync"
) )
// SemaphoreGroup is a custom type that combines sync.WaitGroup and a semaphore. // SemaphoreGroup is a custom type that combines sync.WaitGroup and a semaphore.
type SemaphoreGroup struct { type SemaphoreGroup struct {
waitGroup sync.WaitGroup
semaphore chan struct{} semaphore chan struct{}
} }
@@ -18,31 +16,18 @@ func NewSemaphoreGroup(limit int) *SemaphoreGroup {
} }
} }
// Add increments the internal WaitGroup counter and acquires a semaphore slot. // Add acquire a slot
func (sg *SemaphoreGroup) Add(ctx context.Context) { func (sg *SemaphoreGroup) Add(ctx context.Context) error {
sg.waitGroup.Add(1)
// Acquire semaphore slot // Acquire semaphore slot
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return ctx.Err()
case sg.semaphore <- struct{}{}: case sg.semaphore <- struct{}{}:
return nil
} }
} }
// Done decrements the internal WaitGroup counter and releases a semaphore slot. // Done releases a slot. Must be called after a successful Add.
func (sg *SemaphoreGroup) Done(ctx context.Context) { func (sg *SemaphoreGroup) Done() {
sg.waitGroup.Done() <-sg.semaphore
// Release semaphore slot
select {
case <-ctx.Done():
return
case <-sg.semaphore:
}
}
// Wait waits until the internal WaitGroup counter is zero.
func (sg *SemaphoreGroup) Wait() {
sg.waitGroup.Wait()
} }

View File

@@ -2,65 +2,89 @@ package semaphoregroup
import ( import (
"context" "context"
"sync"
"testing" "testing"
"time" "time"
) )
func TestSemaphoreGroup(t *testing.T) { func TestSemaphoreGroup(t *testing.T) {
semGroup := NewSemaphoreGroup(2)
for i := 0; i < 5; i++ {
semGroup.Add(context.Background())
go func(id int) {
defer semGroup.Done(context.Background())
got := len(semGroup.semaphore)
if got == 0 {
t.Errorf("Expected semaphore length > 0 , got 0")
}
time.Sleep(time.Millisecond)
t.Logf("Goroutine %d is running\n", id)
}(i)
}
semGroup.Wait()
want := 0
got := len(semGroup.semaphore)
if got != want {
t.Errorf("Expected semaphore length %d, got %d", want, got)
}
}
func TestSemaphoreGroupContext(t *testing.T) {
semGroup := NewSemaphoreGroup(1) semGroup := NewSemaphoreGroup(1)
semGroup.Add(context.Background()) _ = semGroup.Add(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
ctxTimeout, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
t.Cleanup(cancel) t.Cleanup(cancel)
rChan := make(chan struct{})
go func() { if err := semGroup.Add(ctxTimeout); err == nil {
semGroup.Add(ctx) t.Error("Adding to semaphore group should not block")
rChan <- struct{}{} }
}() }
select {
case <-rChan: func TestSemaphoreGroupFreeUp(t *testing.T) {
case <-time.NewTimer(2 * time.Second).C: semGroup := NewSemaphoreGroup(1)
t.Error("Adding to semaphore group should not block when context is not done") _ = semGroup.Add(context.Background())
} semGroup.Done()
semGroup.Done(context.Background()) ctxTimeout, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
t.Cleanup(cancel)
ctxDone, cancelDone := context.WithTimeout(context.Background(), 1*time.Second) if err := semGroup.Add(ctxTimeout); err != nil {
t.Cleanup(cancelDone) t.Error(err)
go func() { }
semGroup.Done(ctxDone) }
rChan <- struct{}{}
}() func TestSemaphoreGroupCanceledContext(t *testing.T) {
select { semGroup := NewSemaphoreGroup(1)
case <-rChan: _ = semGroup.Add(context.Background())
case <-time.NewTimer(2 * time.Second).C: ctx, cancel := context.WithCancel(context.Background())
t.Error("Releasing from semaphore group should not block when context is not done") cancel() // Cancel immediately
if err := semGroup.Add(ctx); err == nil {
t.Error("Add should return error when context is already canceled")
}
}
func TestSemaphoreGroupCancelWhileWaiting(t *testing.T) {
semGroup := NewSemaphoreGroup(1)
_ = semGroup.Add(context.Background())
ctx, cancel := context.WithCancel(context.Background())
errChan := make(chan error, 1)
go func() {
errChan <- semGroup.Add(ctx)
}()
time.Sleep(10 * time.Millisecond)
cancel()
if err := <-errChan; err == nil {
t.Error("Add should return error when context is canceled while waiting")
}
}
func TestSemaphoreGroupHighConcurrency(t *testing.T) {
const limit = 10
const numGoroutines = 100
semGroup := NewSemaphoreGroup(limit)
var wg sync.WaitGroup
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func() {
defer wg.Done()
if err := semGroup.Add(context.Background()); err != nil {
t.Errorf("Unexpected error: %v", err)
return
}
time.Sleep(time.Millisecond)
semGroup.Done()
}()
}
wg.Wait()
// Verify all slots were released
if got := len(semGroup.semaphore); got != 0 {
t.Errorf("Expected semaphore to be empty, got %d slots occupied", got)
} }
} }