[client] Fix tray flicker and stuck Connecting during management retry

The status snapshot tore down on every management retry because
state.Status() blanks the status when an error is wrapped, and the
SubscribeStatus stream propagated that as FailedPrecondition. The UI
treated any stream error as "daemon not running" and flickered the tray
to Not running between retries.

Disconnect was also unresponsive: Down set Idle before the retry
goroutine exited, which then overwrote it with Set(Connecting) on the
next attempt; the backoff sleep (up to 15s) wasn't context-aware, so the
goroutine kept running long after actCancel.

- buildStatusResponse falls back to the underlying status (via new
  state.CurrentStatus) instead of breaking the stream on wrapped errors.
- UI only flips to DaemonUnavailable on codes.Unavailable / non-status
  errors, so a live daemon returning FailedPrecondition is not reported
  as down.
- connect retry uses backoff.WithContext so actCancel interrupts the
  inter-attempt sleep, and skips Wrap(err) when the dial fails due to
  ctx cancellation.
- Down sets Idle after waiting for giveUpChan, so the retry goroutine
  can no longer race the disconnect.
- Tray hides Connect during Connecting and keeps Disconnect enabled so
  the user can abort an in-flight connection attempt.
This commit is contained in:
Zoltan Papp
2026-05-12 20:38:30 +02:00
parent 100d25a062
commit e3efaa5e59
5 changed files with 77 additions and 12 deletions

View File

@@ -258,6 +258,15 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
log.Debugf("connecting to the Management service %s", c.config.ManagementURL.Host)
mgmClient, err := mgm.NewClient(engineCtx, c.config.ManagementURL.Host, myPrivateKey, mgmTlsEnabled)
if err != nil {
// On daemon shutdown / Down() the parent context is cancelled
// and the dial fails with "context canceled". Wrapping that
// into state would leave the snapshot stuck at Connecting+err
// until the backoff loop wakes up — instead let the operation
// return cleanly so the deferred state.Set(StatusIdle) takes
// effect on the next iteration.
if c.ctx.Err() != nil {
return nil
}
return wrapErr(gstatus.Errorf(codes.FailedPrecondition, "failed connecting to Management Service : %s", err))
}
mgmNotifier := statusRecorderToMgmConnStateNotifier(c.statusRecorder)
@@ -426,7 +435,11 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
}
c.statusRecorder.ClientStart()
err = backoff.Retry(operation, backOff)
// Wrap the backoff with c.ctx so Down()/actCancel propagates into the
// inter-attempt sleep — otherwise a 15s MaxInterval can keep the retry
// loop alive long after the caller asked to give up, leaving the
// status stream stuck at Connecting.
err = backoff.Retry(operation, backoff.WithContext(backOff, c.ctx))
if err != nil {
log.Debugf("exiting client retry loop due to unrecoverable error: %s", err)
if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {

View File

@@ -57,6 +57,17 @@ func (c *contextState) Status() (StatusType, error) {
return c.status, nil
}
// CurrentStatus returns the last status set via Set, ignoring any wrapped
// error. Use when the status is needed for reporting purposes (e.g. the
// status snapshot stream) and a transient wrapped error from a retry loop
// shouldn't blank out the underlying status.
func (c *contextState) CurrentStatus() StatusType {
c.mutex.Lock()
defer c.mutex.Unlock()
return c.status
}
func (c *contextState) Wrap(err error) error {
c.mutex.Lock()
defer c.mutex.Unlock()