[client] Push status snapshot on every state.Set and classify SSO errors

Two related daemon-side status-stream fixes that together keep the UI's
status in sync with the daemon's contextState:

* state.Set previously only mutated the in-memory enum — transitions
  that weren't accompanied by a Mark{Management,Signal,...} call (e.g.
  StatusNeedsLogin after a PermissionDenied login, StatusLoginFailed
  after OAuth init failure, StatusIdle in the Login defer) left the
  UI stuck on the previous snapshot until an unrelated peer event
  happened to fire notifyStateChange. Add a callback on contextState
  fired from Set (outside the mutex, to avoid lock-order issues with
  the recorder's stateChangeMux), and wire it in Server.Start to the
  recorder's new public NotifyStateChange. Every state.Set callsite
  now pushes automatically; new ones don't need to opt in.

* WaitSSOLogin's WaitToken error branch lumped every failure into
  StatusLoginFailed, including context.Canceled aborts from a parallel
  profile switch (actCancel/waitCancel). That spurious LoginFailed
  then wedged the new profile's Up RPC with "up already in progress:
  current status LoginFailed". Split the branch by error type:
  context.Canceled lets the top-level defer pick StatusIdle,
  context.DeadlineExceeded sets StatusNeedsLogin (retryable; OAuth
  device-code window just expired), other errors keep LoginFailed
  (real auth/IO failures). Document the full state-transition table
  in the function godoc.
This commit is contained in:
Zoltan Papp
2026-05-14 14:51:51 +02:00
parent d33b841a33
commit d841a6aa07
3 changed files with 87 additions and 8 deletions

View File

@@ -1281,6 +1281,18 @@ func (d *Status) notifyStateChange() {
}
}
// NotifyStateChange is the public wake-the-subscribers entry point used by
// callers that mutate state outside the peer recorder — most importantly
// the connect-state machine, which writes StatusNeedsLogin into the
// shared contextState (client/internal/state.go) without touching any
// recorder field. Without this push the SubscribeStatus stream stays on
// the previous snapshot until an unrelated peer/management/signal
// change happens to fire notifyStateChange, leaving the UI's status
// out of sync with the daemon.
func (d *Status) NotifyStateChange() {
d.notifyStateChange()
}
func (d *Status) SetWgIface(wgInterface WGIfaceStatus) {
d.mux.Lock()
defer d.mux.Unlock()

View File

@@ -33,17 +33,34 @@ func CtxGetState(ctx context.Context) *contextState {
}
type contextState struct {
err error
status StatusType
mutex sync.Mutex
err error
status StatusType
mutex sync.Mutex
onChange func()
}
// SetOnChange installs a callback fired after every successful Set. Used by
// the daemon to wire the status recorder's notifyStateChange so any
// state.Set in the connect/login paths pushes a fresh snapshot to
// SubscribeStatus subscribers without each callsite having to opt in.
// The callback runs outside the contextState mutex to avoid a lock-order
// dependency with the recorder's stateChangeMux.
func (c *contextState) SetOnChange(fn func()) {
c.mutex.Lock()
c.onChange = fn
c.mutex.Unlock()
}
func (c *contextState) Set(update StatusType) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.status = update
c.err = nil
cb := c.onChange
c.mutex.Unlock()
if cb != nil {
cb()
}
}
func (c *contextState) Status() (StatusType, error) {

View File

@@ -140,6 +140,15 @@ func (s *Server) Start() error {
}
state := internal.CtxGetState(s.rootCtx)
// Every contextState.Set in the connect/login/server paths must push a
// SubscribeStatus snapshot, otherwise transitions that don't happen to
// be accompanied by a Mark{Management,Signal,...} call (e.g. plain
// StatusNeedsLogin after a PermissionDenied login, StatusLoginFailed
// after OAuth init failure, StatusIdle in the Login defer) leave the
// UI stuck on the previous status until the next unrelated peer event.
// Binding the recorder here means new state.Set callsites don't have
// to opt in individually.
state.SetOnChange(s.statusRecorder.NotifyStateChange)
if err := handlePanicLog(); err != nil {
log.Warnf("failed to redirect stderr: %v", err)
@@ -571,8 +580,35 @@ func (s *Server) Login(callerCtx context.Context, msg *proto.LoginRequest) (*pro
return &proto.LoginResponse{}, nil
}
// WaitSSOLogin uses the userCode to validate the TokenInfo and
// waits for the user to continue with the login on a browser
// WaitSSOLogin validates the supplied userCode against the in-flight OAuth
// device/PKCE flow and blocks until the user finishes the browser leg.
//
// State transitions on exit:
//
// ┌──────────────────────────────────────────┬──────────────────────────────────┐
// │ Outcome │ contextState │
// ├──────────────────────────────────────────┼──────────────────────────────────┤
// │ Success → loginAttempt → Connected │ StatusConnected (loginAttempt) │
// │ Success → loginAttempt → still-NeedsLogin│ StatusNeedsLogin (loginAttempt) │
// │ Success → loginAttempt error │ StatusLoginFailed (loginAttempt) │
// │ UserCode mismatch │ StatusLoginFailed │
// │ WaitToken: context.Canceled (external │ defer runs: status untouched if │
// │ abort — profile switch invokes │ already NeedsLogin/LoginFailed,│
// │ actCancel/waitCancel, app quit, │ else StatusIdle. Keeps the │
// │ another WaitSSOLogin started) │ cancel from leaking as a │
// │ │ spurious LoginFailed on the │
// │ │ next profile's Up. │
// │ WaitToken: context.DeadlineExceeded │ StatusNeedsLogin │
// │ (OAuth device-code window expired │ (retryable; the UI's "Connect" │
// │ while waiting on the browser leg) │ re-enters the Login flow) │
// │ WaitToken: any other error │ StatusLoginFailed │
// │ (access_denied, expired_token, HTTP │ (genuine auth/IO failure; │
// │ failure, token validation rejection) │ surfaced verbatim to caller) │
// └──────────────────────────────────────────┴──────────────────────────────────┘
//
// The defer at the top of the function applies the Idle fallback so callers
// that bypass the explicit Set calls (the Canceled branch above, the success
// path before loginAttempt) still land on a sensible terminal status.
func (s *Server) WaitSSOLogin(callerCtx context.Context, msg *proto.WaitSSOLoginRequest) (*proto.WaitSSOLoginResponse, error) {
s.mutex.Lock()
if s.actCancel != nil {
@@ -632,7 +668,21 @@ func (s *Server) WaitSSOLogin(callerCtx context.Context, msg *proto.WaitSSOLogin
s.mutex.Lock()
s.oauthAuthFlow.expiresAt = time.Now()
s.mutex.Unlock()
state.Set(internal.StatusLoginFailed)
switch {
case errors.Is(err, context.Canceled):
// External abort (profile switch, app quit, another
// WaitSSOLogin started). Not a login failure — let the
// top-level defer fall through to StatusIdle so the next
// flow starts from a clean state.
case errors.Is(err, context.DeadlineExceeded):
// OAuth device-code window expired with no user action.
// Retryable — leave the daemon in NeedsLogin so the UI
// keeps the Login affordance instead of reading as a
// hard failure.
state.Set(internal.StatusNeedsLogin)
default:
state.Set(internal.StatusLoginFailed)
}
log.Errorf("waiting for browser login failed: %v", err)
return nil, err
}