[client] Fix tray flicker and stuck Connecting during management retry

The status snapshot tore down on every management retry because state.Status() blanks the status when an error is wrapped, and the SubscribeStatus stream propagated that as FailedPrecondition. The UI treated any stream error as "daemon not running" and flickered the tray to Not running between retries. Disconnect was also unresponsive: Down set Idle before the retry goroutine exited, which then overwrote it with Set(Connecting) on the next attempt; the backoff sleep (up to 15s) wasn't context-aware, so the goroutine kept running long after actCancel. - buildStatusResponse falls back to the underlying status (via new state.CurrentStatus) instead of breaking the stream on wrapped errors. - UI only flips to DaemonUnavailable on codes.Unavailable / non-status errors, so a live daemon returning FailedPrecondition is not reported as down. - connect retry uses backoff.WithContext so actCancel interrupts the inter-attempt sleep, and skips Wrap(err) when the dial fails due to ctx cancellation. - Down sets Idle after waiting for giveUpChan, so the retry goroutine can no longer race the disconnect. - Tray hides Connect during Connecting and keeps Disconnect enabled so the user can abort an in-flight connection attempt.
2026-05-13 04:09:56 +00:00 · 2026-05-12 20:38:30 +02:00
parent 100d25a062
commit e3efaa5e59
5 changed files with 77 additions and 12 deletions
--- a/client/internal/connect.go
+++ b/client/internal/connect.go
@@ -258,6 +258,15 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
 		log.Debugf("connecting to the Management service %s", c.config.ManagementURL.Host)
 		mgmClient, err := mgm.NewClient(engineCtx, c.config.ManagementURL.Host, myPrivateKey, mgmTlsEnabled)
 		if err != nil {
+			// On daemon shutdown / Down() the parent context is cancelled
+			// and the dial fails with "context canceled". Wrapping that
+			// into state would leave the snapshot stuck at Connecting+err
+			// until the backoff loop wakes up — instead let the operation
+			// return cleanly so the deferred state.Set(StatusIdle) takes
+			// effect on the next iteration.
+			if c.ctx.Err() != nil {
+				return nil
+			}
 			return wrapErr(gstatus.Errorf(codes.FailedPrecondition, "failed connecting to Management Service : %s", err))
 		}
 		mgmNotifier := statusRecorderToMgmConnStateNotifier(c.statusRecorder)
@@ -426,7 +435,11 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
 	}

 	c.statusRecorder.ClientStart()
-	err = backoff.Retry(operation, backOff)
+	// Wrap the backoff with c.ctx so Down()/actCancel propagates into the
+	// inter-attempt sleep — otherwise a 15s MaxInterval can keep the retry
+	// loop alive long after the caller asked to give up, leaving the
+	// status stream stuck at Connecting.
+	err = backoff.Retry(operation, backoff.WithContext(backOff, c.ctx))
 	if err != nil {
 		log.Debugf("exiting client retry loop due to unrecoverable error: %s", err)
 		if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
--- a/client/internal/state.go
+++ b/client/internal/state.go
@@ -57,6 +57,17 @@ func (c *contextState) Status() (StatusType, error) {
 	return c.status, nil
 }

+// CurrentStatus returns the last status set via Set, ignoring any wrapped
+// error. Use when the status is needed for reporting purposes (e.g. the
+// status snapshot stream) and a transient wrapped error from a retry loop
+// shouldn't blank out the underlying status.
+func (c *contextState) CurrentStatus() StatusType {
+	c.mutex.Lock()
+	defer c.mutex.Unlock()
+
+	return c.status
+}
+
 func (c *contextState) Wrap(err error) error {
 	c.mutex.Lock()
 	defer c.mutex.Unlock()