[client] Skip re-resolving cached management cache domains (#6518 )

[management] Reschedule inactivity expiration when a peer disconnects (#6523 )
Fixup debug config (#6514 )
2026-06-24 16:59:55 +00:00 · 2026-06-23 17:55:57 +02:00 · 2026-06-23 17:44:32 +03:00 · 2026-06-22 22:01:49 +02:00 · 2026-06-22 19:54:38 +02:00 · 2026-06-22 19:42:04 +02:00
53 changed files with 2640 additions and 467 deletions
--- a/.github/workflows/check-license-dependencies.yml
+++ b/.github/workflows/check-license-dependencies.yml
@@ -20,7 +20,7 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

@@ -59,12 +59,12 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Set up Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: true
--- a/.github/workflows/git-town.yml
+++ b/.github/workflows/git-town.yml
@@ -15,7 +15,7 @@ jobs:
      pull-requests: write

    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false
      - uses: git-town/action@3d8b878379abb1ee393fb49865a28b4a6c2cd3b0 # v1.2.1
--- a/.github/workflows/golang-test-darwin.yml
+++ b/.github/workflows/golang-test-darwin.yml
@@ -16,12 +16,12 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -48,7 +48,7 @@ jobs:
        run: NETBIRD_STORE_ENGINE=${{ matrix.store }} CI=true go test -coverprofile=coverage.txt -tags=devcert -exec 'sudo --preserve-env=CI,NETBIRD_STORE_ENGINE' -timeout 5m -p 1 $(go list ./... | grep -v -e /management -e /signal -e /relay -e /proxy -e /combined)

      - name: Upload coverage reports to Codecov
-        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 #v6.0.1
+        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f #v7.0.0
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: netbirdio/netbird
--- a/.github/workflows/golang-test-freebsd.yml
+++ b/.github/workflows/golang-test-freebsd.yml
@@ -16,7 +16,7 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

@@ -28,7 +28,7 @@ jobs:
        id: test
        env:
          GO_VERSION: ${{ steps.goversion.outputs.version }}
-        uses: vmactions/freebsd-vm@d1e65811565151536c0c894fff74f06351ed26e6 # v1.4.5
+        uses: vmactions/freebsd-vm@b84ab5559b5a1bb4b8ee2737d2506a16e1737636 # v1.4.8
        with:
          usesh: true
          copyback: false
--- a/.github/workflows/golang-test-linux.yml
+++ b/.github/workflows/golang-test-linux.yml
@@ -18,7 +18,7 @@ jobs:
      management: ${{ steps.filter.outputs.management }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

@@ -30,7 +30,7 @@ jobs:
              - 'management/**'

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -119,12 +119,12 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -162,7 +162,7 @@ jobs:

      - name: Upload coverage reports to Codecov
        if: matrix.arch == 'amd64'
-        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 #v6.0.1
+        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f #v7.0.0
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: netbirdio/netbird
@@ -175,12 +175,12 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -246,12 +246,12 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -290,7 +290,7 @@ jobs:

      - name: Upload coverage reports to Codecov
        if: matrix.arch == 'amd64'
-        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 #v6.0.1
+        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f #v7.0.0
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: netbirdio/netbird
@@ -306,12 +306,12 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -347,7 +347,7 @@ jobs:

      - name: Upload coverage reports to Codecov
        if: matrix.arch == 'amd64'
-        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 #v6.0.1
+        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f #v7.0.0
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: netbirdio/netbird
@@ -363,12 +363,12 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -407,7 +407,7 @@ jobs:

      - name: Upload coverage reports to Codecov
        if: matrix.arch == 'amd64'
-        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 #v6.0.1
+        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f #v7.0.0
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: netbirdio/netbird
@@ -424,12 +424,12 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -484,7 +484,7 @@ jobs:

      - name: Upload coverage reports to Codecov
        if: matrix.arch == 'amd64'
-        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 #v6.0.1
+        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f #v7.0.0
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: netbirdio/netbird
@@ -529,12 +529,12 @@ jobs:
            prom/prometheus

      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -623,12 +623,12 @@ jobs:
            prom/prometheus

      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -692,12 +692,12 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -734,7 +734,7 @@ jobs:

      - name: Upload coverage reports to Codecov
        if: matrix.arch == 'amd64'
-        uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 #v6.0.1
+        uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f #v7.0.0
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          slug: netbirdio/netbird
--- a/.github/workflows/golang-test-windows.yml
+++ b/.github/workflows/golang-test-windows.yml
@@ -18,12 +18,12 @@ jobs:
    runs-on: windows-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        id: go
        with:
          go-version-file: "go.mod"
--- a/.github/workflows/golangci-lint.yml
+++ b/.github/workflows/golangci-lint.yml
@@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false
      - name: codespell
@@ -40,7 +40,7 @@ jobs:
    timeout-minutes: 15
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false
      - name: Check for duplicate constants
@@ -48,7 +48,7 @@ jobs:
        run: |
          ! awk '/const \(/,/)/{print $0}' management/server/activity/codes.go | grep -o '= [0-9]*' | sort | uniq -d | grep .
      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
--- a/.github/workflows/install-script-test.yml
+++ b/.github/workflows/install-script-test.yml
@@ -22,7 +22,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

--- a/.github/workflows/mobile-build-validation.yml
+++ b/.github/workflows/mobile-build-validation.yml
@@ -16,11 +16,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false
      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
      - name: Setup Android SDK
@@ -28,7 +28,7 @@ jobs:
        with:
          cmdline-tools-version: 8512546
      - name: Setup Java
-        uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654
+        uses: actions/setup-java@ad2b38190b15e4d6bdf0c97fb4fca8412226d287
        with:
          java-version: "11"
          distribution: "adopt"
@@ -54,11 +54,11 @@ jobs:
    runs-on: macos-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false
      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
      - name: install gomobile
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

@@ -64,7 +64,7 @@ jobs:
        if: steps.check_diff.outputs.diff_exists == 'true'
        env:
          GO_VERSION: ${{ steps.goversion.outputs.version }}
-        uses: vmactions/freebsd-vm@d1e65811565151536c0c894fff74f06351ed26e6 # v1.4.5
+        uses: vmactions/freebsd-vm@b84ab5559b5a1bb4b8ee2737d2506a16e1737636 # v1.4.8
        with:
          usesh: true
          copyback: false
@@ -135,7 +135,7 @@ jobs:
      ghcr_images: ${{ steps.tag_and_push_images.outputs.images_markdown }}
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          fetch-depth: 0 # It is required for GoReleaser to work properly
          persist-credentials: false
@@ -166,7 +166,7 @@ jobs:
          fi

      - name: Set up Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -186,9 +186,9 @@ jobs:
      - name: check git status
        run: git --no-pager diff --exit-code
      - name: Set up QEMU
-        uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0
+        uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 #v4.1.0
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd #v4.0.0
+        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 #v4.1.0
      - name: Login to Docker hub
        if: github.event_name != 'pull_request'
        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
@@ -221,7 +221,7 @@ jobs:
        run: goversioninfo -arm -64 -icon client/ui/assets/netbird.ico -manifest client/manifest.xml -product-name ${{ env.PRODUCT_NAME }} -copyright "${{ env.COPYRIGHT }}" -ver-major ${{ steps.semver_parser.outputs.major }} -ver-minor ${{ steps.semver_parser.outputs.minor }} -ver-patch ${{ steps.semver_parser.outputs.patch }} -ver-build 0 -file-version ${{ steps.semver_parser.outputs.fullversion }}.0 -product-version ${{ steps.semver_parser.outputs.fullversion }}.0 -o client/resources_windows_arm64.syso
      - name: Run GoReleaser
        id: goreleaser
-        uses: goreleaser/goreleaser-action@4c6ab561adb47e50c45ef534e2155934e91c40c1 # v7.2.0
+        uses: goreleaser/goreleaser-action@5daf1e915a5f0af01ddbcd89a43b8061ff4f1a89 # v7.2.2
        with:
          version: ${{ env.GORELEASER_VER }}
          args: release --clean ${{ env.flags }}
@@ -347,7 +347,7 @@ jobs:
      release_ui_artifact_url: ${{ steps.upload_release_ui.outputs.artifact-url }}
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          fetch-depth: 0 # It is required for GoReleaser to work properly
          persist-credentials: false
@@ -374,7 +374,7 @@ jobs:
          fi

      - name: Set up Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -420,7 +420,7 @@ jobs:
        run: goversioninfo -arm -64 -icon client/ui/assets/netbird.ico -manifest client/ui/manifest.xml -product-name ${{ env.PRODUCT_NAME }}-"UI" -copyright "${{ env.COPYRIGHT }}" -ver-major ${{ steps.semver_parser.outputs.major }} -ver-minor ${{ steps.semver_parser.outputs.minor }} -ver-patch ${{ steps.semver_parser.outputs.patch }} -ver-build 0 -file-version ${{ steps.semver_parser.outputs.fullversion }}.0 -product-version ${{ steps.semver_parser.outputs.fullversion }}.0 -o client/ui/resources_windows_arm64.syso

      - name: Run GoReleaser
-        uses: goreleaser/goreleaser-action@4c6ab561adb47e50c45ef534e2155934e91c40c1 # v7.2.0
+        uses: goreleaser/goreleaser-action@5daf1e915a5f0af01ddbcd89a43b8061ff4f1a89 # v7.2.2
        with:
          version: ${{ env.GORELEASER_VER }}
          args: release --config .goreleaser_ui.yaml --clean ${{ env.flags }}
@@ -464,12 +464,12 @@ jobs:
      - if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
        run: echo "flags=--snapshot" >> $GITHUB_ENV
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          fetch-depth: 0 # It is required for GoReleaser to work properly
          persist-credentials: false
      - name: Set up Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
          cache: false
@@ -488,7 +488,7 @@ jobs:
        run: git --no-pager diff --exit-code
      - name: Run GoReleaser
        id: goreleaser
-        uses: goreleaser/goreleaser-action@4c6ab561adb47e50c45ef534e2155934e91c40c1 # v7.2.0
+        uses: goreleaser/goreleaser-action@5daf1e915a5f0af01ddbcd89a43b8061ff4f1a89 # v7.2.2
        with:
          version: ${{ env.GORELEASER_VER }}
          args: release --config .goreleaser_ui_darwin.yaml --clean ${{ env.flags }}
@@ -522,7 +522,7 @@ jobs:
      downloadPath: '${{ github.workspace }}\temp'
    steps:
      - name: Checkout
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

@@ -534,13 +534,13 @@ jobs:
        run: echo "C:\Program Files\7-Zip" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append

      - name: Download release artifacts
-        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.1
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
        with:
          name: release
          path: release

      - name: Download UI release artifacts
-        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.1
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
        with:
          name: release-ui
          path: release-ui
--- a/.github/workflows/test-infrastructure-files.yml
+++ b/.github/workflows/test-infrastructure-files.yml
@@ -68,12 +68,12 @@ jobs:
        run: sudo apt-get install -y curl

      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"

@@ -207,7 +207,7 @@ jobs:
      - name: Build management docker image
        working-directory: management
        run: |
-          docker build -t netbirdio/management:latest .
+          docker build -t netbirdio/management:latest --build-arg TARGETPLATFORM=. .

      - name: Build signal binary
        working-directory: signal
@@ -216,7 +216,7 @@ jobs:
      - name: Build signal docker image
        working-directory: signal
        run: |
-          docker build -t netbirdio/signal:latest .
+          docker build -t netbirdio/signal:latest --build-arg TARGETPLATFORM=. .

      - name: Build relay binary
        working-directory: relay
@@ -225,7 +225,7 @@ jobs:
      - name: Build relay docker image
        working-directory: relay
        run: |
-          docker build -t netbirdio/relay:latest .
+          docker build -t netbirdio/relay:latest --build-arg TARGETPLATFORM=. .

      - name: run docker compose up
        working-directory: infrastructure_files/artifacts
@@ -256,7 +256,7 @@ jobs:
        run: sudo apt-get install -y jq

      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false

--- a/.github/workflows/wasm-build-validation.yml
+++ b/.github/workflows/wasm-build-validation.yml
@@ -19,11 +19,11 @@ jobs:
      GOARCH: wasm
    steps:
      - name: Checkout repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false
      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
      - name: Install dependencies
@@ -44,11 +44,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
        with:
          persist-credentials: false
      - name: Install Go
-        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
        with:
          go-version-file: "go.mod"
      - name: Build Wasm client
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -462,9 +462,13 @@ checksum:
    - glob: ./infrastructure_files/getting-started-with-zitadel.sh
    - glob: ./release_files/install.sh
    - glob: ./infrastructure_files/getting-started.sh
+    - glob: ./infrastructure_files/getting-started-enterprise.sh
+    - glob: ./infrastructure_files/migrate-to-enterprise.sh

 release:
  extra_files:
    - glob: ./infrastructure_files/getting-started-with-zitadel.sh
    - glob: ./release_files/install.sh
    - glob: ./infrastructure_files/getting-started.sh
+    - glob: ./infrastructure_files/getting-started-enterprise.sh
+    - glob: ./infrastructure_files/migrate-to-enterprise.sh
--- a/client/cmd/debug.go
+++ b/client/cmd/debug.go
@@ -130,7 +130,7 @@ func debugConfigDump(cmd *cobra.Command, _ []string) error {

 	client := proto.NewDaemonServiceClient(conn)
 	resp, err := client.GetConfig(cmd.Context(), &proto.GetConfigRequest{
-		ProfileName: activeProf.Name,
+		ProfileName: string(activeProf.ID),
 		Username:    currUser.Username,
 	})
 	if err != nil {
--- a/client/embed/embed.go
+++ b/client/embed/embed.go
@@ -279,9 +279,11 @@ func (c *Client) Start(startCtx context.Context) error {

 	select {
 	case <-startCtx.Done():
-		// Cancel the client context before stopping: Engine.Start blocks on the
-		// signal stream while holding the engine mutex and only unblocks on
-		// cancellation. Stopping first would deadlock on that mutex.
+		// ConnectClient.Stop now cancels its own run context and waits for the
+		// run loop to tear the engine down, so this cancel() is no longer
+		// required to break the deadlock and could be removed. It is kept as a
+		// defensive belt-and-suspenders: cancelling the parent context first
+		// guarantees the run loop is unblocked even if Stop's contract regresses.
 		cancel()
 		if stopErr := client.Stop(); stopErr != nil {
 			return fmt.Errorf("stop error after context done. Stop error: %w. Context done: %w", stopErr, startCtx.Err())
--- a/client/internal/connect.go
+++ b/client/internal/connect.go
@@ -11,6 +11,7 @@ import (
 	"runtime/debug"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/cenkalti/backoff/v4"
@@ -54,6 +55,10 @@ var androidRunOverride func(c *ConnectClient, runningChan chan struct{}, logPath

 type ConnectClient struct {
 	ctx            context.Context
+	runCancel      context.CancelFunc
+	runExited      chan struct{}
+	runOnce        sync.Once
+	runStarted     atomic.Bool
 	config         *profilemanager.Config
 	statusRecorder *peer.Status

@@ -70,8 +75,14 @@ func NewConnectClient(
 	config *profilemanager.Config,
 	statusRecorder *peer.Status,
 ) *ConnectClient {
+	// Derive the run context here so Stop owns the cancel that unblocks the run
+	// loop. runCancel is set once at construction, so Stop can call it without
+	// racing the run loop's startup. Callers therefore need not cancel before Stop.
+	runCtx, runCancel := context.WithCancel(ctx)
 	return &ConnectClient{
-		ctx:            ctx,
+		ctx:            runCtx,
+		runCancel:      runCancel,
+		runExited:      make(chan struct{}),
 		config:         config,
 		statusRecorder: statusRecorder,
 		engineMutex:    sync.Mutex{},
@@ -135,6 +146,11 @@ func (c *ConnectClient) RunOniOS(
 }

 func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan struct{}, logPath string) error {
+	// Mark the loop as started and signal exit on return so Stop can wait for
+	// the loop to finish (and skip the wait if the loop never ran).
+	c.runStarted.Store(true)
+	defer c.runOnce.Do(func() { close(c.runExited) })
+
 	defer func() {
 		if r := recover(); r != nil {
 			rec := c.statusRecorder
@@ -290,7 +306,7 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
 			log.Debug(err)
 			if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
 				state.Set(StatusNeedsLogin)
-				_ = c.Stop()
+				c.runCancel()
 				return backoff.Permanent(wrapErr(err)) // unrecoverable error
 			}
 			return wrapErr(err)
@@ -410,14 +426,10 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
 		c.engine = nil
 		c.engineMutex.Unlock()

-		// todo: consider to remove this condition. Is not thread safe.
-		// We should always call Stop(), but we need to verify that it is idempotent
-		if engine.wgInterface != nil {
-			log.Infof("ensuring %s is removed, Netbird engine context cancelled", engine.wgInterface.Name())
+		log.Infof("ensuring wg interface is removed, Netbird engine context cancelled")

-			if err := engine.Stop(); err != nil {
-				log.Errorf("Failed to stop engine: %v", err)
-			}
+		if err := engine.Stop(); err != nil {
+			log.Errorf("Failed to stop engine: %v", err)
 		}
 		c.statusRecorder.ClientTeardown()

@@ -433,12 +445,12 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
 	}

 	c.statusRecorder.ClientStart()
-	err = backoff.Retry(operation, backOff)
+	err = backoff.Retry(operation, backoff.WithContext(backOff, c.ctx))
 	if err != nil {
 		log.Debugf("exiting client retry loop due to unrecoverable error: %s", err)
 		if s, ok := gstatus.FromError(err); ok && (s.Code() == codes.PermissionDenied) {
 			state.Set(StatusNeedsLogin)
-			_ = c.Stop()
+			c.runCancel()
 		}
 		return err
 	}
@@ -516,11 +528,9 @@ func (c *ConnectClient) Status() StatusType {
 }

 func (c *ConnectClient) Stop() error {
-	engine := c.Engine()
-	if engine != nil {
-		if err := engine.Stop(); err != nil {
-			return fmt.Errorf("stop engine: %w", err)
-		}
+	c.runCancel()
+	if c.runStarted.Load() {
+		<-c.runExited
 	}
 	return nil
 }
--- a/client/internal/dns/mgmt/mgmt.go
+++ b/client/internal/dns/mgmt/mgmt.go
@@ -51,13 +51,20 @@ type cachedRecord struct {
 }

 // Resolver caches critical NetBird infrastructure domains.
-// records, refreshing, mgmtDomain and serverDomains are all guarded by mutex.
+// records, refreshing, failedResolves, mgmtDomain and serverDomains are all
+// guarded by mutex.
 type Resolver struct {
 	records       map[dns.Question]*cachedRecord
 	mgmtDomain    *domain.Domain
 	serverDomains *dnsconfig.ServerDomains
 	mutex         sync.RWMutex

+	// failedResolves records the last failed initial resolve per domain so a
+	// domain that never resolves isn't retried on every server-domains update
+	// until refreshBackoff elapses. Entries are cleared on success and pruned
+	// to the current server-domains set.
+	failedResolves map[domain.Domain]time.Time
+
 	chain            ChainResolver
 	chainMaxPriority int
 	refreshGroup     singleflight.Group
@@ -76,9 +83,10 @@ type Resolver struct {
 // NewResolver creates a new management domains cache resolver.
 func NewResolver() *Resolver {
 	return &Resolver{
-		records:    make(map[dns.Question]*cachedRecord),
-		refreshing: make(map[dns.Question]*atomic.Bool),
-		cacheTTL:   resolveCacheTTL(),
+		records:        make(map[dns.Question]*cachedRecord),
+		refreshing:     make(map[dns.Question]*atomic.Bool),
+		failedResolves: make(map[domain.Domain]time.Time),
+		cacheTTL:       resolveCacheTTL(),
 	}
 }

@@ -173,7 +181,9 @@ func (m *Resolver) continueToNext(w dns.ResponseWriter, r *dns.Msg) {

 // AddDomain resolves a domain and stores its A/AAAA records in the cache.
 // A family that resolves NODATA (nil err, zero records) evicts any stale
-// entry for that qtype.
+// entry for that qtype. When one family hard-errors while the other succeeds,
+// the resolved family is still cached but AddDomain returns an error so the
+// caller retries the incomplete resolve rather than treating it as complete.
 func (m *Resolver) AddDomain(ctx context.Context, d domain.Domain) error {
 	dnsName := strings.ToLower(dns.Fqdn(d.PunycodeString()))

@@ -203,6 +213,10 @@ func (m *Resolver) AddDomain(ctx context.Context, d domain.Domain) error {
 	log.Debugf("added/updated domain=%s with %d A records and %d AAAA records",
 		d.SafeString(), len(aRecords), len(aaaaRecords))

+	if errA != nil || errAAAA != nil {
+		return fmt.Errorf("resolve %s: incomplete, a family failed: %w", d.SafeString(), errors.Join(errA, errAAAA))
+	}
+
 	return nil
 }

@@ -462,6 +476,7 @@ func (m *Resolver) RemoveDomain(d domain.Domain) error {
 	delete(m.records, qAAAA)
 	delete(m.refreshing, qA)
 	delete(m.refreshing, qAAAA)
+	delete(m.failedResolves, d)

 	log.Debugf("removed domain=%s from cache", d.SafeString())
 	return nil
@@ -505,6 +520,7 @@ func (m *Resolver) UpdateFromServerDomains(ctx context.Context, serverDomains dn
 		allDomains := m.extractDomainsFromServerDomains(updatedServerDomains)
 		currentDomains := m.GetCachedDomains()
 		removedDomains = m.removeStaleDomains(currentDomains, allDomains)
+		m.pruneFailedResolves(allDomains)
 	}

 	m.addNewDomains(ctx, newDomains)
@@ -577,13 +593,85 @@ func (m *Resolver) isManagementDomain(domain domain.Domain) bool {
 	return m.mgmtDomain != nil && domain == *m.mgmtDomain
 }

-// addNewDomains resolves and caches all domains from the update
+// addNewDomains resolves and caches domains that are not yet in the cache,
+// running the lookups concurrently. Domains already cached are skipped and left
+// to the stale-while-revalidate refresh path, so a sync never re-resolves them
+// synchronously: once NetBird owns the OS resolver the resolve runs through the
+// handler chain and would otherwise dial the managed upstreams under the engine
+// sync lock on every update.
 func (m *Resolver) addNewDomains(ctx context.Context, newDomains domain.List) {
+	var wg sync.WaitGroup
+	seen := make(map[domain.Domain]struct{}, len(newDomains))
 	for _, newDomain := range newDomains {
-		if err := m.AddDomain(ctx, newDomain); err != nil {
-			log.Warnf("failed to add/update domain=%s: %v", newDomain.SafeString(), err)
-		} else {
-			log.Debugf("added/updated management cache domain=%s", newDomain.SafeString())
+		if _, dup := seen[newDomain]; dup {
+			continue
+		}
+		seen[newDomain] = struct{}{}
+
+		if !m.needsResolve(newDomain) {
+			continue
+		}
+
+		wg.Add(1)
+		go func(d domain.Domain) {
+			defer wg.Done()
+			if err := m.AddDomain(ctx, d); err != nil {
+				m.markResolveFailed(d)
+				log.Warnf("failed to add/update domain=%s: %v", d.SafeString(), err)
+				return
+			}
+			m.clearResolveFailed(d)
+			log.Debugf("added/updated management cache domain=%s", d.SafeString())
+		}(newDomain)
+	}
+	wg.Wait()
+}
+
+// needsResolve reports whether d should be resolved now. A recent failed or
+// incomplete resolve gates retries on the backoff even when one family is
+// already cached, so a transiently-failed family is retried instead of being
+// treated as fully resolved. Otherwise a domain with any cached record is left
+// to the stale-while-revalidate refresh path.
+func (m *Resolver) needsResolve(d domain.Domain) bool {
+	dnsName := strings.ToLower(dns.Fqdn(d.PunycodeString()))
+
+	m.mutex.RLock()
+	defer m.mutex.RUnlock()
+
+	if failedAt, ok := m.failedResolves[d]; ok {
+		return time.Since(failedAt) >= refreshBackoff
+	}
+
+	for _, qtype := range []uint16{dns.TypeA, dns.TypeAAAA} {
+		q := dns.Question{Name: dnsName, Qtype: qtype, Qclass: dns.ClassINET}
+		if _, ok := m.records[q]; ok {
+			return false
+		}
+	}
+	return true
+}
+
+func (m *Resolver) markResolveFailed(d domain.Domain) {
+	m.mutex.Lock()
+	m.failedResolves[d] = time.Now()
+	m.mutex.Unlock()
+}
+
+func (m *Resolver) clearResolveFailed(d domain.Domain) {
+	m.mutex.Lock()
+	delete(m.failedResolves, d)
+	m.mutex.Unlock()
+}
+
+// pruneFailedResolves drops failure markers for domains no longer present in
+// the server-domains set, keeping the map bounded to the current set (a
+// failed-only domain has no cached record, so RemoveDomain never sees it).
+func (m *Resolver) pruneFailedResolves(domains domain.List) {
+	m.mutex.Lock()
+	defer m.mutex.Unlock()
+	for d := range m.failedResolves {
+		if !slices.Contains(domains, d) {
+			delete(m.failedResolves, d)
 		}
 	}
 }
--- a/client/internal/dns/mgmt/mgmt_refresh_test.go
+++ b/client/internal/dns/mgmt/mgmt_refresh_test.go
@@ -21,6 +21,7 @@ type fakeChain struct {
 	mu       sync.Mutex
 	calls    map[string]int
 	answers  map[string][]dns.RR
+	qErr     map[string]error
 	err      error
 	hasRoot  bool
 	onLookup func()
@@ -30,6 +31,7 @@ func newFakeChain() *fakeChain {
 	return &fakeChain{
 		calls:   map[string]int{},
 		answers: map[string][]dns.RR{},
+		qErr:    map[string]error{},
 		hasRoot: true,
 	}
 }
@@ -47,6 +49,9 @@ func (f *fakeChain) ResolveInternal(ctx context.Context, msg *dns.Msg, maxPriori
 	f.calls[key]++
 	answers := f.answers[key]
 	err := f.err
+	if err == nil {
+		err = f.qErr[key]
+	}
 	onLookup := f.onLookup
 	f.mu.Unlock()

@@ -75,6 +80,12 @@ func (f *fakeChain) setAnswer(name string, qtype uint16, ip string) {
 	}
 }

+func (f *fakeChain) setErr(name string, qtype uint16, err error) {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	f.qErr[name+"|"+dns.TypeToString[qtype]] = err
+}
+
 func (f *fakeChain) callCount(name string, qtype uint16) int {
 	f.mu.Lock()
 	defer f.mu.Unlock()
--- a/client/internal/dns/mgmt/mgmt_resolve_test.go
+++ b/client/internal/dns/mgmt/mgmt_resolve_test.go
@@ -0,0 +1,183 @@
+package mgmt
+
+import (
+	"context"
+	"errors"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/miekg/dns"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	dnsconfig "github.com/netbirdio/netbird/client/internal/dns/config"
+	"github.com/netbirdio/netbird/shared/management/domain"
+)
+
+// A domain already in the cache must not be re-resolved on a subsequent server
+// domains update; it is left to the stale-while-revalidate refresh path.
+func TestResolver_UpdateFromServerDomains_SkipsCached(t *testing.T) {
+	r := NewResolver()
+	chain := newFakeChain()
+	chain.setAnswer("signal.example.com.", dns.TypeA, "10.0.0.2")
+	r.SetChainResolver(chain, 50)
+
+	sd := dnsconfig.ServerDomains{Signal: domain.Domain("signal.example.com")}
+
+	_, err := r.UpdateFromServerDomains(context.Background(), sd)
+	require.NoError(t, err)
+	require.Equal(t, 1, chain.callCount("signal.example.com.", dns.TypeA),
+		"first update must resolve the domain")
+
+	_, err = r.UpdateFromServerDomains(context.Background(), sd)
+	require.NoError(t, err)
+	assert.Equal(t, 1, chain.callCount("signal.example.com.", dns.TypeA),
+		"cached domain must not be re-resolved on a subsequent update")
+}
+
+// New domains in a single update must resolve concurrently rather than serially.
+func TestResolver_AddNewDomains_ResolvesConcurrently(t *testing.T) {
+	r := NewResolver()
+	chain := newFakeChain()
+
+	var inflight, maxInflight atomic.Int32
+	chain.onLookup = func() {
+		n := inflight.Add(1)
+		for {
+			old := maxInflight.Load()
+			if n <= old || maxInflight.CompareAndSwap(old, n) {
+				break
+			}
+		}
+		time.Sleep(50 * time.Millisecond)
+		inflight.Add(-1)
+	}
+
+	relays := []domain.Domain{"a.example.com", "b.example.com", "c.example.com", "d.example.com"}
+	for _, d := range relays {
+		chain.setAnswer(dns.Fqdn(string(d)), dns.TypeA, "10.0.0.2")
+	}
+	r.SetChainResolver(chain, 50)
+
+	start := time.Now()
+	_, err := r.UpdateFromServerDomains(context.Background(), dnsconfig.ServerDomains{Relay: relays})
+	require.NoError(t, err)
+	elapsed := time.Since(start)
+
+	assert.GreaterOrEqual(t, int(maxInflight.Load()), 2, "domains must resolve concurrently")
+	// Serial resolution of 4 domains would take at least 4*50ms; concurrent is far less.
+	assert.Less(t, elapsed, 300*time.Millisecond, "resolution should not be serial")
+}
+
+// A domain that fails to resolve must not be retried on every update; the
+// failure backoff suppresses re-resolution until it expires.
+func TestResolver_UpdateFromServerDomains_BacksOffFailures(t *testing.T) {
+	r := NewResolver()
+	chain := newFakeChain()
+	chain.err = errors.New("resolve boom")
+	r.SetChainResolver(chain, 50)
+
+	sd := dnsconfig.ServerDomains{Signal: domain.Domain("signal.example.com")}
+
+	_, err := r.UpdateFromServerDomains(context.Background(), sd)
+	require.NoError(t, err)
+	require.Equal(t, 1, chain.callCount("signal.example.com.", dns.TypeA),
+		"first update must attempt the resolve")
+
+	_, err = r.UpdateFromServerDomains(context.Background(), sd)
+	require.NoError(t, err)
+	assert.Equal(t, 1, chain.callCount("signal.example.com.", dns.TypeA),
+		"failed resolve must back off and not retry on the next update")
+}
+
+// A domain listed under more than one server-domain type (e.g. STUN and TURN on
+// the same host) must be resolved once per update, not once per occurrence.
+func TestResolver_AddNewDomains_DedupesDuplicateDomains(t *testing.T) {
+	r := NewResolver()
+	chain := newFakeChain()
+	chain.setAnswer("dup.example.com.", dns.TypeA, "10.0.0.9")
+	r.SetChainResolver(chain, 50)
+
+	sd := dnsconfig.ServerDomains{
+		Stuns: []domain.Domain{"dup.example.com"},
+		Turns: []domain.Domain{"dup.example.com"},
+	}
+
+	_, err := r.UpdateFromServerDomains(context.Background(), sd)
+	require.NoError(t, err)
+	assert.Equal(t, 1, chain.callCount("dup.example.com.", dns.TypeA),
+		"a domain appearing under multiple server-domain types must resolve once")
+}
+
+// A failure marker must be dropped once its domain leaves the server-domains set
+// so the map stays bounded to the current set.
+func TestResolver_UpdateFromServerDomains_PrunesFailedResolves(t *testing.T) {
+	r := NewResolver()
+	chain := newFakeChain()
+	chain.err = errors.New("resolve boom")
+	r.SetChainResolver(chain, 50)
+
+	_, err := r.UpdateFromServerDomains(context.Background(), dnsconfig.ServerDomains{Signal: domain.Domain("gone.example.com")})
+	require.NoError(t, err)
+	r.mutex.RLock()
+	_, marked := r.failedResolves[domain.Domain("gone.example.com")]
+	r.mutex.RUnlock()
+	require.True(t, marked, "failed resolve must be recorded")
+
+	_, err = r.UpdateFromServerDomains(context.Background(), dnsconfig.ServerDomains{Signal: domain.Domain("other.example.com")})
+	require.NoError(t, err)
+	r.mutex.RLock()
+	_, stillMarked := r.failedResolves[domain.Domain("gone.example.com")]
+	r.mutex.RUnlock()
+	assert.False(t, stillMarked, "failure marker for a domain no longer in the set must be pruned")
+}
+
+// When one family hard-errors while the other resolves, the domain is cached
+// for the working family but recorded as incomplete so the failed family is
+// retried under backoff instead of being treated as fully resolved forever.
+func TestResolver_AddNewDomains_RetriesPartialFamilyFailure(t *testing.T) {
+	d := domain.Domain("relay.example.com")
+	r := NewResolver()
+	chain := newFakeChain()
+	chain.setAnswer("relay.example.com.", dns.TypeA, "10.0.0.2")
+	chain.setErr("relay.example.com.", dns.TypeAAAA, errors.New("servfail"))
+	r.SetChainResolver(chain, 50)
+
+	_, err := r.UpdateFromServerDomains(context.Background(), dnsconfig.ServerDomains{Relay: []domain.Domain{d}})
+	require.NoError(t, err)
+
+	r.mutex.RLock()
+	_, aCached := r.records[dns.Question{Name: "relay.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET}]
+	_, marked := r.failedResolves[d]
+	r.mutex.RUnlock()
+	require.True(t, aCached, "the working family must still be cached")
+	require.True(t, marked, "a partial failure must be recorded so the failed family is retried")
+
+	assert.False(t, r.needsResolve(d), "within the backoff window the domain is not retried")
+
+	r.mutex.Lock()
+	r.failedResolves[d] = time.Now().Add(-2 * refreshBackoff)
+	r.mutex.Unlock()
+	assert.True(t, r.needsResolve(d), "after the backoff elapses the domain is retried to pick up the missing family")
+}
+
+// A family that returns NODATA (legitimately absent, e.g. an IPv4-only host) is
+// not a failure: the domain must not be marked for retry, otherwise it would be
+// re-resolved on every sync.
+func TestResolver_AddNewDomains_NodataIsNotFailure(t *testing.T) {
+	d := domain.Domain("v4only.example.com")
+	r := NewResolver()
+	chain := newFakeChain()
+	chain.setAnswer("v4only.example.com.", dns.TypeA, "10.0.0.2")
+	r.SetChainResolver(chain, 50)
+
+	_, err := r.UpdateFromServerDomains(context.Background(), dnsconfig.ServerDomains{Relay: []domain.Domain{d}})
+	require.NoError(t, err)
+
+	r.mutex.RLock()
+	_, marked := r.failedResolves[d]
+	r.mutex.RUnlock()
+	assert.False(t, marked, "a NODATA family must not be recorded as a failure")
+	assert.False(t, r.needsResolve(d), "an IPv4-only host must not be re-resolved on later syncs")
+}
--- a/client/internal/dns/resutil/resolve.go
+++ b/client/internal/dns/resutil/resolve.go
@@ -207,3 +207,35 @@ func FormatAnswers(answers []dns.RR) string {
 	}
 	return "[" + strings.Join(parts, ", ") + "]"
 }
+
+// StripOPT removes any OPT pseudo-RRs from the message's Extra section. Per
+// RFC 6891 a responder must not include an OPT RR toward a client that did not
+// advertise EDNS0.
+func StripOPT(msg *dns.Msg) {
+	if len(msg.Extra) == 0 {
+		return
+	}
+	out := msg.Extra[:0]
+	for _, rr := range msg.Extra {
+		if _, ok := rr.(*dns.OPT); ok {
+			continue
+		}
+		out = append(out, rr)
+	}
+	msg.Extra = out
+}
+
+// ExtractEDE returns the first Extended DNS Error (RFC 8914) option carried in
+// the message, if present.
+func ExtractEDE(msg *dns.Msg) (*dns.EDNS0_EDE, bool) {
+	opt := msg.IsEdns0()
+	if opt == nil {
+		return nil, false
+	}
+	for _, o := range opt.Option {
+		if ede, ok := o.(*dns.EDNS0_EDE); ok {
+			return ede, true
+		}
+	}
+	return nil, false
+}
--- a/client/internal/dns/resutil/resolve_test.go
+++ b/client/internal/dns/resutil/resolve_test.go
@@ -120,3 +120,42 @@ func TestLookupIP_DNSErrorNotIsNotFound(t *testing.T) {

 	assert.Equal(t, dns.RcodeServerFailure, result.Rcode, "upstream failure should map to SERVFAIL")
 }
+
+func TestStripOPT(t *testing.T) {
+	rm := &dns.Msg{
+		Extra: []dns.RR{
+			&dns.OPT{Hdr: dns.RR_Header{Name: ".", Rrtype: dns.TypeOPT}},
+			&dns.A{Hdr: dns.RR_Header{Name: "x.", Rrtype: dns.TypeA}, A: net.IPv4(1, 2, 3, 4)},
+		},
+	}
+	StripOPT(rm)
+	assert.Len(t, rm.Extra, 1, "OPT should be removed, A kept")
+	_, isOPT := rm.Extra[0].(*dns.OPT)
+	assert.False(t, isOPT, "remaining record must not be OPT")
+}
+
+func TestExtractEDE(t *testing.T) {
+	t.Run("no edns", func(t *testing.T) {
+		_, ok := ExtractEDE(&dns.Msg{})
+		assert.False(t, ok, "message without OPT has no EDE")
+	})
+
+	t.Run("edns without ede", func(t *testing.T) {
+		rm := &dns.Msg{}
+		rm.SetEdns0(4096, false)
+		_, ok := ExtractEDE(rm)
+		assert.False(t, ok, "OPT without EDE option returns false")
+	})
+
+	t.Run("with ede", func(t *testing.T) {
+		rm := &dns.Msg{}
+		opt := &dns.OPT{Hdr: dns.RR_Header{Name: ".", Rrtype: dns.TypeOPT}}
+		opt.Option = append(opt.Option, &dns.EDNS0_EDE{InfoCode: 49152, ExtraText: "upstream timeout"})
+		rm.Extra = append(rm.Extra, opt)
+
+		ede, ok := ExtractEDE(rm)
+		assert.True(t, ok, "EDE option should be found")
+		assert.Equal(t, uint16(49152), ede.InfoCode)
+		assert.Equal(t, "upstream timeout", ede.ExtraText)
+	})
+}
--- a/client/internal/dns/server.go
+++ b/client/internal/dns/server.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"net/netip"
 	"net/url"
+	"os"
 	"slices"
 	"strings"
 	"sync"
@@ -38,11 +39,15 @@ const (
 	// defaultWarningDelayBase is the starting grace window before a
 	// "Nameserver group unreachable" event fires for a group that's
 	// never been healthy and only has overlay upstreams with no
-	// Connected peer. Per-server and overridable; see warningDelayFor.
-	defaultWarningDelayBase = 30 * time.Second
+	// Connected peer. Per-server and overridable via envWarningDelay;
+	// see warningDelay.
+	defaultWarningDelayBase = 60 * time.Second
 	// warningDelayBonusCap caps the route-count bonus added to the
-	// base grace window. See warningDelayFor.
+	// base grace window. See warningDelay.
 	warningDelayBonusCap = 30 * time.Second
+	// envWarningDelay overrides defaultWarningDelayBase with a Go duration
+	// string (e.g. "90s", "2m"). Invalid or non-positive values are ignored.
+	envWarningDelay = "NB_DNS_HEALTH_WARNING_DELAY"
 )

 // errNoUsableNameservers signals that a merged-domain group has no usable
@@ -135,7 +140,7 @@ type DefaultServer struct {
 	disableSys         bool
 	mux                sync.Mutex
 	service            service
-	dnsMuxMap          registeredHandlerMap
+	dnsMuxHandlers     []handlerWrapper
 	localResolver      *local.Resolver
 	wgInterface        WGIface
 	hostManager        hostManager
@@ -199,8 +204,6 @@ type handlerWrapper struct {
 	priority int
 }

-type registeredHandlerMap map[types.HandlerID]handlerWrapper
-
 // DefaultServerConfig holds configuration parameters for NewDefaultServer
 type DefaultServerConfig struct {
 	WgInterface    WGIface
@@ -289,7 +292,6 @@ func newDefaultServer(
 		service:           dnsService,
 		handlerChain:      handlerChain,
 		extraDomains:      make(map[domain.Domain]int),
-		dnsMuxMap:         make(registeredHandlerMap),
 		localResolver:     local.NewResolver(),
 		wgInterface:       wgInterface,
 		statusRecorder:    statusRecorder,
@@ -298,7 +300,7 @@ func newDefaultServer(
 		hostManager:       &noopHostConfigurator{},
 		mgmtCacheResolver: mgmtCacheResolver,
 		currentConfigHash: ^uint64(0), // Initialize to max uint64 to ensure first config is always applied
-		warningDelayBase:  defaultWarningDelayBase,
+		warningDelayBase:  warningDelayBaseFromEnv(),
 		healthRefresh:     make(chan struct{}, 1),
 	}
 	// Wire the local resolver against the peer status recorder so it can
@@ -328,7 +330,7 @@ func (s *DefaultServer) SetRouteSources(selected, active func() route.HAMap) {
 	type routeSettable interface {
 		setSelectedRoutes(func() route.HAMap)
 	}
-	for _, entry := range s.dnsMuxMap {
+	for _, entry := range s.dnsMuxHandlers {
 		if h, ok := entry.handler.(routeSettable); ok {
 			h.setSelectedRoutes(selected)
 		}
@@ -978,19 +980,23 @@ func (s *DefaultServer) usableNameServers(nameServers []nbdns.NameServer) []neti

 func (s *DefaultServer) updateMux(muxUpdates []handlerWrapper) {
 	// this will introduce a short period of time when the server is not able to handle DNS requests
-	for _, existing := range s.dnsMuxMap {
+	for _, existing := range s.dnsMuxHandlers {
 		s.deregisterHandler([]string{existing.domain}, existing.priority)
-		existing.handler.Stop()
+		// The local resolver is a persistent singleton shared by every custom
+		// zone and reused across config updates. Its chain registrations are
+		// per-config and must be deregistered, but Stop() cancels its lookup
+		// context (breaking external CNAME-target resolution) and clears its
+		// records, so it must not be torn down here.
+		if existing.handler != s.localResolver {
+			existing.handler.Stop()
+		}
 	}

-	muxUpdateMap := make(registeredHandlerMap)
-
 	for _, update := range muxUpdates {
 		s.registerHandler([]string{update.domain}, update.handler, update.priority)
-		muxUpdateMap[update.handler.ID()] = update
 	}

-	s.dnsMuxMap = muxUpdateMap
+	s.dnsMuxHandlers = muxUpdates
 }

 // updateNSGroupStates records the new group set and pokes the refresher.
@@ -1154,6 +1160,26 @@ func (s *DefaultServer) projectUnhealthy(p *nsGroupProj, servers []netip.AddrPor
 	return false
 }

+// warningDelayBaseFromEnv returns the base grace window, honoring
+// envWarningDelay when it holds a valid positive Go duration. Invalid or
+// non-positive values fall back to defaultWarningDelayBase.
+func warningDelayBaseFromEnv() time.Duration {
+	val := os.Getenv(envWarningDelay)
+	if val == "" {
+		return defaultWarningDelayBase
+	}
+	d, err := time.ParseDuration(val)
+	if err != nil {
+		log.Warnf("invalid %s value %q, using default %v: %v", envWarningDelay, val, defaultWarningDelayBase, err)
+		return defaultWarningDelayBase
+	}
+	if d <= 0 {
+		log.Warnf("%s must be positive, got %v, using default %v", envWarningDelay, d, defaultWarningDelayBase)
+		return defaultWarningDelayBase
+	}
+	return d
+}
+
 // warningDelay returns the grace window for the given selected-route
 // count. Scales gently: +1s per 100 routes, capped by
 // warningDelayBonusCap. Parallel handshakes mean handshake time grows
@@ -1204,7 +1230,7 @@ func (s *DefaultServer) groupHasImmediateUpstream(servers []netip.AddrPort, snap
 // in more than one handler.
 func (s *DefaultServer) collectUpstreamHealth() map[netip.AddrPort]UpstreamHealth {
 	merged := make(map[netip.AddrPort]UpstreamHealth)
-	for _, entry := range s.dnsMuxMap {
+	for _, entry := range s.dnsMuxHandlers {
 		reporter, ok := entry.handler.(upstreamHealthReporter)
 		if !ok {
 			continue
--- a/client/internal/dns/server_test.go
+++ b/client/internal/dns/server_test.go
@@ -104,19 +104,6 @@ func init() {
 	formatter.SetTextFormatter(log.StandardLogger())
 }

-func generateDummyHandler(d string, servers []nbdns.NameServer) *upstreamResolverBase {
-	var srvs []netip.AddrPort
-	for _, srv := range servers {
-		srvs = append(srvs, srv.AddrPort())
-	}
-	u := &upstreamResolverBase{
-		domain: domain.Domain(d),
-		cancel: func() {},
-	}
-	u.addRace(srvs)
-	return u
-}
-
 func TestUpdateDNSServer(t *testing.T) {

 	nameServers := []nbdns.NameServer{
@@ -132,22 +119,20 @@ func TestUpdateDNSServer(t *testing.T) {
 		},
 	}

-	dummyHandler := local.NewResolver()
-
 	testCases := []struct {
 		name                string
-		initUpstreamMap     registeredHandlerMap
+		initUpstreamMap     []handlerWrapper
 		initLocalZones      []nbdns.CustomZone
 		initSerial          uint64
 		inputSerial         uint64
 		inputUpdate         nbdns.Config
 		shouldFail          bool
-		expectedUpstreamMap registeredHandlerMap
+		expectedUpstreamMap []handlerWrapper
 		expectedLocalQs     []dns.Question
 	}{
 		{
 			name:            "Initial Config Should Succeed",
-			initUpstreamMap: make(registeredHandlerMap),
+			initUpstreamMap: nil,
 			initSerial:      0,
 			inputSerial:     1,
 			inputUpdate: nbdns.Config{
@@ -169,20 +154,17 @@ func TestUpdateDNSServer(t *testing.T) {
 					},
 				},
 			},
-			expectedUpstreamMap: registeredHandlerMap{
-				generateDummyHandler("netbird.io", nameServers).ID(): handlerWrapper{
+			expectedUpstreamMap: []handlerWrapper{
+				{
 					domain:   "netbird.io",
-					handler:  dummyHandler,
 					priority: PriorityUpstream,
 				},
-				dummyHandler.ID(): handlerWrapper{
+				{
 					domain:   "netbird.cloud",
-					handler:  dummyHandler,
 					priority: PriorityLocal,
 				},
-				generateDummyHandler(".", nameServers).ID(): handlerWrapper{
+				{
 					domain:   nbdns.RootZone,
-					handler:  dummyHandler,
 					priority: PriorityDefault,
 				},
 			},
@@ -191,10 +173,10 @@ func TestUpdateDNSServer(t *testing.T) {
 		{
 			name:           "New Config Should Succeed",
 			initLocalZones: []nbdns.CustomZone{{Domain: "netbird.cloud", Records: []nbdns.SimpleRecord{{Name: "netbird.cloud", Type: 1, Class: nbdns.DefaultClass, TTL: 300, RData: "10.0.0.1"}}}},
-			initUpstreamMap: registeredHandlerMap{
-				generateDummyHandler(zoneRecords[0].Name, nameServers).ID(): handlerWrapper{
+			initUpstreamMap: []handlerWrapper{
+				{
 					domain:   "netbird.cloud",
-					handler:  dummyHandler,
+					handler:  &mockHandler{},
 					priority: PriorityUpstream,
 				},
 			},
@@ -215,15 +197,13 @@ func TestUpdateDNSServer(t *testing.T) {
 					},
 				},
 			},
-			expectedUpstreamMap: registeredHandlerMap{
-				generateDummyHandler("netbird.io", nameServers).ID(): handlerWrapper{
+			expectedUpstreamMap: []handlerWrapper{
+				{
 					domain:   "netbird.io",
-					handler:  dummyHandler,
 					priority: PriorityUpstream,
 				},
-				"local-resolver": handlerWrapper{
+				{
 					domain:   "netbird.cloud",
-					handler:  dummyHandler,
 					priority: PriorityLocal,
 				},
 			},
@@ -232,7 +212,7 @@ func TestUpdateDNSServer(t *testing.T) {
 		{
 			name:            "Smaller Config Serial Should Be Skipped",
 			initLocalZones:  []nbdns.CustomZone{},
-			initUpstreamMap: make(registeredHandlerMap),
+			initUpstreamMap: nil,
 			initSerial:      2,
 			inputSerial:     1,
 			shouldFail:      true,
@@ -240,7 +220,7 @@ func TestUpdateDNSServer(t *testing.T) {
 		{
 			name:            "Empty NS Group Domain Or Not Primary Element Should Fail",
 			initLocalZones:  []nbdns.CustomZone{},
-			initUpstreamMap: make(registeredHandlerMap),
+			initUpstreamMap: nil,
 			initSerial:      0,
 			inputSerial:     1,
 			inputUpdate: nbdns.Config{
@@ -262,7 +242,7 @@ func TestUpdateDNSServer(t *testing.T) {
 		{
 			name:            "Invalid NS Group Nameservers list Should Fail",
 			initLocalZones:  []nbdns.CustomZone{},
-			initUpstreamMap: make(registeredHandlerMap),
+			initUpstreamMap: nil,
 			initSerial:      0,
 			inputSerial:     1,
 			inputUpdate: nbdns.Config{
@@ -284,7 +264,7 @@ func TestUpdateDNSServer(t *testing.T) {
 		{
 			name:            "Invalid Custom Zone Records list Should Skip",
 			initLocalZones:  []nbdns.CustomZone{},
-			initUpstreamMap: make(registeredHandlerMap),
+			initUpstreamMap: nil,
 			initSerial:      0,
 			inputSerial:     1,
 			inputUpdate: nbdns.Config{
@@ -301,42 +281,41 @@ func TestUpdateDNSServer(t *testing.T) {
 					},
 				},
 			},
-			expectedUpstreamMap: registeredHandlerMap{generateDummyHandler(".", nameServers).ID(): handlerWrapper{
+			expectedUpstreamMap: []handlerWrapper{{
 				domain:   ".",
-				handler:  dummyHandler,
 				priority: PriorityDefault,
 			}},
 		},
 		{
 			name:           "Empty Config Should Succeed and Clean Maps",
 			initLocalZones: []nbdns.CustomZone{{Domain: "netbird.cloud", Records: []nbdns.SimpleRecord{{Name: "netbird.cloud", Type: int(dns.TypeA), Class: nbdns.DefaultClass, TTL: 300, RData: "10.0.0.1"}}}},
-			initUpstreamMap: registeredHandlerMap{
-				generateDummyHandler(zoneRecords[0].Name, nameServers).ID(): handlerWrapper{
+			initUpstreamMap: []handlerWrapper{
+				{
 					domain:   zoneRecords[0].Name,
-					handler:  dummyHandler,
+					handler:  &mockHandler{},
 					priority: PriorityUpstream,
 				},
 			},
 			initSerial:          0,
 			inputSerial:         1,
 			inputUpdate:         nbdns.Config{ServiceEnable: true},
-			expectedUpstreamMap: make(registeredHandlerMap),
+			expectedUpstreamMap: nil,
 			expectedLocalQs:     []dns.Question{},
 		},
 		{
 			name:           "Disabled Service Should clean map",
 			initLocalZones: []nbdns.CustomZone{{Domain: "netbird.cloud", Records: []nbdns.SimpleRecord{{Name: "netbird.cloud", Type: int(dns.TypeA), Class: nbdns.DefaultClass, TTL: 300, RData: "10.0.0.1"}}}},
-			initUpstreamMap: registeredHandlerMap{
-				generateDummyHandler(zoneRecords[0].Name, nameServers).ID(): handlerWrapper{
+			initUpstreamMap: []handlerWrapper{
+				{
 					domain:   zoneRecords[0].Name,
-					handler:  dummyHandler,
+					handler:  &mockHandler{},
 					priority: PriorityUpstream,
 				},
 			},
 			initSerial:          0,
 			inputSerial:         1,
 			inputUpdate:         nbdns.Config{ServiceEnable: false},
-			expectedUpstreamMap: make(registeredHandlerMap),
+			expectedUpstreamMap: nil,
 			expectedLocalQs:     []dns.Question{},
 		},
 	}
@@ -393,7 +372,7 @@ func TestUpdateDNSServer(t *testing.T) {
 				}
 			}()

-			dnsServer.dnsMuxMap = testCase.initUpstreamMap
+			dnsServer.dnsMuxHandlers = testCase.initUpstreamMap
 			dnsServer.localResolver.Update(testCase.initLocalZones)
 			dnsServer.updateSerial = testCase.initSerial

@@ -405,14 +384,20 @@ func TestUpdateDNSServer(t *testing.T) {
 				t.Fatalf("update dns server should not fail, got error: %v", err)
 			}

-			if len(dnsServer.dnsMuxMap) != len(testCase.expectedUpstreamMap) {
-				t.Fatalf("update upstream failed, map size is different than expected, want %d, got %d", len(testCase.expectedUpstreamMap), len(dnsServer.dnsMuxMap))
+			if len(dnsServer.dnsMuxHandlers) != len(testCase.expectedUpstreamMap) {
+				t.Fatalf("update upstream failed, map size is different than expected, want %d, got %d", len(testCase.expectedUpstreamMap), len(dnsServer.dnsMuxHandlers))
 			}

-			for key := range testCase.expectedUpstreamMap {
-				_, found := dnsServer.dnsMuxMap[key]
+			for _, expected := range testCase.expectedUpstreamMap {
+				found := false
+				for _, got := range dnsServer.dnsMuxHandlers {
+					if got.domain == expected.domain && got.priority == expected.priority {
+						found = true
+						break
+					}
+				}
 				if !found {
-					t.Fatalf("update upstream failed, key %s was not found in the dnsMuxMap: %#v", key, dnsServer.dnsMuxMap)
+					t.Fatalf("update upstream failed, handler for domain=%s priority=%d not found in dnsMuxHandlers: %#v", expected.domain, expected.priority, dnsServer.dnsMuxHandlers)
 				}
 			}

@@ -512,8 +497,8 @@ func TestDNSFakeResolverHandleUpdates(t *testing.T) {
 		}
 	}()

-	dnsServer.dnsMuxMap = registeredHandlerMap{
-		"id1": handlerWrapper{
+	dnsServer.dnsMuxHandlers = []handlerWrapper{
+		{
 			domain:   zoneRecords[0].Name,
 			handler:  &local.Resolver{},
 			priority: PriorityUpstream,
@@ -1029,15 +1014,15 @@ func (m *mockService) RegisterMux(string, dns.Handler) {}
 func (m *mockService) DeregisterMux(string)            {}

 func TestDefaultServer_UpdateMux(t *testing.T) {
-	baseMatchHandlers := registeredHandlerMap{
-		"upstream-group1": {
+	baseMatchHandlers := []handlerWrapper{
+		{
 			domain: "example.com",
 			handler: &mockHandler{
 				Id: "upstream-group1",
 			},
 			priority: PriorityUpstream,
 		},
-		"upstream-group2": {
+		{
 			domain: "example.com",
 			handler: &mockHandler{
 				Id: "upstream-group2",
@@ -1046,15 +1031,15 @@ func TestDefaultServer_UpdateMux(t *testing.T) {
 		},
 	}

-	baseRootHandlers := registeredHandlerMap{
-		"upstream-root1": {
+	baseRootHandlers := []handlerWrapper{
+		{
 			domain: ".",
 			handler: &mockHandler{
 				Id: "upstream-root1",
 			},
 			priority: PriorityDefault,
 		},
-		"upstream-root2": {
+		{
 			domain: ".",
 			handler: &mockHandler{
 				Id: "upstream-root2",
@@ -1063,22 +1048,22 @@ func TestDefaultServer_UpdateMux(t *testing.T) {
 		},
 	}

-	baseMixedHandlers := registeredHandlerMap{
-		"upstream-group1": {
+	baseMixedHandlers := []handlerWrapper{
+		{
 			domain: "example.com",
 			handler: &mockHandler{
 				Id: "upstream-group1",
 			},
 			priority: PriorityUpstream,
 		},
-		"upstream-group2": {
+		{
 			domain: "example.com",
 			handler: &mockHandler{
 				Id: "upstream-group2",
 			},
 			priority: PriorityUpstream - 1,
 		},
-		"upstream-other": {
+		{
 			domain: "other.com",
 			handler: &mockHandler{
 				Id: "upstream-other",
@@ -1089,7 +1074,7 @@ func TestDefaultServer_UpdateMux(t *testing.T) {

 	tests := []struct {
 		name             string
-		initialHandlers  registeredHandlerMap
+		initialHandlers  []handlerWrapper
 		updates          []handlerWrapper
 		expectedHandlers map[string]string // map[HandlerID]domain
 		description      string
@@ -1373,32 +1358,38 @@ func TestDefaultServer_UpdateMux(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			server := &DefaultServer{
-				dnsMuxMap:    tt.initialHandlers,
-				handlerChain: NewHandlerChain(),
-				service:      &mockService{},
+				dnsMuxHandlers: tt.initialHandlers,
+				handlerChain:   NewHandlerChain(),
+				service:        &mockService{},
 			}

 			// Perform the update
 			server.updateMux(tt.updates)

 			// Verify the results
-			assert.Equal(t, len(tt.expectedHandlers), len(server.dnsMuxMap),
+			assert.Equal(t, len(tt.expectedHandlers), len(server.dnsMuxHandlers),
 				"Number of handlers after update doesn't match expected")

 			// Check each expected handler
 			for id, expectedDomain := range tt.expectedHandlers {
-				handler, exists := server.dnsMuxMap[types.HandlerID(id)]
-				assert.True(t, exists, "Expected handler %s not found", id)
-				if exists {
-					assert.Equal(t, expectedDomain, handler.domain,
+				var found *handlerWrapper
+				for i := range server.dnsMuxHandlers {
+					if server.dnsMuxHandlers[i].handler.ID() == types.HandlerID(id) {
+						found = &server.dnsMuxHandlers[i]
+						break
+					}
+				}
+				assert.NotNil(t, found, "Expected handler %s not found", id)
+				if found != nil {
+					assert.Equal(t, expectedDomain, found.domain,
 						"Domain mismatch for handler %s", id)
 				}
 			}

 			// Verify no unexpected handlers exist
-			for HandlerID := range server.dnsMuxMap {
-				_, expected := tt.expectedHandlers[string(HandlerID)]
-				assert.True(t, expected, "Unexpected handler found: %s", HandlerID)
+			for _, entry := range server.dnsMuxHandlers {
+				_, expected := tt.expectedHandlers[string(entry.handler.ID())]
+				assert.True(t, expected, "Unexpected handler found: %s", entry.handler.ID())
 			}

 			// Verify the handlerChain state and order
@@ -1413,7 +1404,7 @@ func TestDefaultServer_UpdateMux(t *testing.T) {

 				// Verify handler exists in mux
 				foundInMux := false
-				for _, muxEntry := range server.dnsMuxMap {
+				for _, muxEntry := range server.dnsMuxHandlers {
 					if chainEntry.Handler == muxEntry.handler &&
 						chainEntry.Priority == muxEntry.priority &&
 						chainEntry.Pattern == dns.Fqdn(muxEntry.domain) {
@@ -1422,12 +1413,108 @@ func TestDefaultServer_UpdateMux(t *testing.T) {
 					}
 				}
 				assert.True(t, foundInMux,
-					"Handler in chain not found in dnsMuxMap")
+					"Handler in chain not found in dnsMuxHandlers")
 			}
 		})
 	}
 }

+// chainHasPattern reports whether the handler chain holds an entry registered
+// for the given fqdn pattern at the given priority.
+func chainHasPattern(s *DefaultServer, pattern string, priority int) bool {
+	for _, h := range s.handlerChain.handlers {
+		if h.OrigPattern == pattern && h.Priority == priority {
+			return true
+		}
+	}
+	return false
+}
+
+// TestDefaultServer_UpdateMux_SharedHandlerZoneRemoval verifies that updateMux
+// tracks each (handler, domain) registration independently when one handler
+// serves multiple zones. Every custom zone is served by the same handler
+// instance (the local resolver, whose ID is the constant "local-resolver"), so
+// removing one zone must deregister exactly that zone's chain entry and leave
+// the others in place. Tracking registrations by handler ID alone collapses all
+// zones onto one entry, leaving removed zones in the chain to answer
+// authoritatively with no records.
+func TestDefaultServer_UpdateMux_SharedHandlerZoneRemoval(t *testing.T) {
+	// One handler serves every custom zone, mirroring s.localResolver.
+	shared := &mockHandler{Id: "local-resolver"}
+
+	server := &DefaultServer{
+		handlerChain: NewHandlerChain(),
+		service:      &mockService{},
+	}
+
+	// Two custom zones under the same handler. The surviving zone is registered
+	// last, mirroring the management emission order.
+	server.updateMux([]handlerWrapper{
+		{domain: "userzone.test", handler: shared, priority: PriorityLocal},
+		{domain: "peerzone.test", handler: shared, priority: PriorityLocal},
+	})
+
+	require.True(t, chainHasPattern(server, "userzone.test.", PriorityLocal),
+		"userzone.test should be registered after the first update")
+	require.True(t, chainHasPattern(server, "peerzone.test.", PriorityLocal),
+		"peerzone.test should be registered after the first update")
+
+	// Remove one zone, keep the other.
+	server.updateMux([]handlerWrapper{
+		{domain: "peerzone.test", handler: shared, priority: PriorityLocal},
+	})
+
+	assert.True(t, chainHasPattern(server, "peerzone.test.", PriorityLocal),
+		"peerzone.test should remain after removing userzone.test")
+	assert.False(t, chainHasPattern(server, "userzone.test.", PriorityLocal),
+		"userzone.test handler must be deregistered, not leaked in the chain")
+}
+
+// TestDefaultServer_UpdateMux_PreservesLocalResolver verifies that updateMux
+// does not tear down the shared local resolver during reconfiguration. The
+// resolver is a process-lifetime singleton reused across config updates;
+// Stop() cancels its lookup context (breaking external CNAME-target
+// resolution) and clears its records. updateMux must deregister its chain
+// entries without stopping it. Records surviving a teardown update is the
+// observable proxy: Stop() would have cleared them.
+func TestDefaultServer_UpdateMux_PreservesLocalResolver(t *testing.T) {
+	resolver := local.NewResolver()
+	require.NoError(t, resolver.RegisterRecord(nbdns.SimpleRecord{
+		Name:  "peer.netbird.cloud.",
+		Type:  int(dns.TypeA),
+		Class: nbdns.DefaultClass,
+		TTL:   300,
+		RData: "10.0.0.1",
+	}))
+
+	server := &DefaultServer{
+		handlerChain:  NewHandlerChain(),
+		service:       &mockService{},
+		localResolver: resolver,
+	}
+
+	server.updateMux([]handlerWrapper{
+		{domain: "netbird.cloud", handler: resolver, priority: PriorityLocal},
+	})
+
+	// Remove the zone. The resolver must survive so its records and lookup
+	// context stay intact for the next registration.
+	server.updateMux(nil)
+
+	var response *dns.Msg
+	resolver.ServeDNS(&test.MockResponseWriter{
+		WriteMsgFunc: func(m *dns.Msg) error {
+			response = m
+			return nil
+		},
+	}, &dns.Msg{Question: []dns.Question{{Name: "peer.netbird.cloud.", Qtype: dns.TypeA, Qclass: dns.ClassINET}}})
+
+	require.NotNil(t, response, "local resolver should answer after teardown")
+	assert.Equal(t, dns.RcodeSuccess, response.Rcode,
+		"local resolver records must survive teardown; updateMux must not Stop() the shared resolver")
+	assert.NotEmpty(t, response.Answer, "answer should contain the surviving record")
+}
+
 func TestExtraDomains(t *testing.T) {
 	tests := []struct {
 		name                string
@@ -2049,7 +2136,6 @@ func TestBuildUpstreamHandler_MergesGroupsPerDomain(t *testing.T) {
 		localResolver: local.NewResolver(),
 		handlerChain:  NewHandlerChain(),
 		hostManager:   &noopHostConfigurator{},
-		dnsMuxMap:     make(registeredHandlerMap),
 	}

 	groups := []*nbdns.NameServerGroup{
@@ -2207,7 +2293,7 @@ func TestEvaluateNSGroupHealth(t *testing.T) {
 	}
 }

-// healthStubHandler is a minimal dnsMuxMap entry that exposes a fixed
+// healthStubHandler is a minimal dnsMuxHandlers entry that exposes a fixed
 // UpstreamHealth snapshot, letting tests drive recomputeNSGroupStates
 // without spinning up real handlers.
 type healthStubHandler struct {
@@ -2283,12 +2369,11 @@ func newProjTestFixture(t *testing.T) *projTestFixture {
 		ctx:              context.Background(),
 		wgInterface:      &mocWGIface{},
 		statusRecorder:   recorder,
-		dnsMuxMap:        make(registeredHandlerMap),
 		selectedRoutes:   func() route.HAMap { return fx.selected },
 		activeRoutes:     func() route.HAMap { return fx.active },
 		warningDelayBase: defaultWarningDelayBase,
 	}
-	fx.server.dnsMuxMap["example.com"] = handlerWrapper{domain: "example.com", handler: fx.stub, priority: PriorityUpstream}
+	fx.server.dnsMuxHandlers = []handlerWrapper{{domain: "example.com", handler: fx.stub, priority: PriorityUpstream}}

 	fx.server.mux.Lock()
 	fx.server.updateNSGroupStates([]*nbdns.NameServerGroup{fx.group})
@@ -2395,7 +2480,6 @@ func TestProjection_OverlayAddrNoRouteDelaysWarning(t *testing.T) {
 		ctx:              context.Background(),
 		wgInterface:      &mocWGIface{},
 		statusRecorder:   recorder,
-		dnsMuxMap:        make(registeredHandlerMap),
 		selectedRoutes:   func() route.HAMap { return nil },
 		activeRoutes:     func() route.HAMap { return nil },
 		warningDelayBase: 50 * time.Millisecond,
@@ -2407,7 +2491,7 @@ func TestProjection_OverlayAddrNoRouteDelaysWarning(t *testing.T) {
 	stub := &healthStubHandler{health: map[netip.AddrPort]UpstreamHealth{
 		overlayPeer: {LastFail: time.Now(), LastErr: "timeout"},
 	}}
-	server.dnsMuxMap["example.com"] = handlerWrapper{domain: "example.com", handler: stub, priority: PriorityUpstream}
+	server.dnsMuxHandlers = []handlerWrapper{{domain: "example.com", handler: stub, priority: PriorityUpstream}}

 	server.mux.Lock()
 	server.updateNSGroupStates([]*nbdns.NameServerGroup{group})
@@ -2444,7 +2528,6 @@ func TestProjection_StopClearsHealthState(t *testing.T) {
 		service:           NewServiceViaMemory(wgIface),
 		hostManager:       &noopHostConfigurator{},
 		extraDomains:      map[domain.Domain]int{},
-		dnsMuxMap:         make(registeredHandlerMap),
 		statusRecorder:    peer.NewRecorder("mgm"),
 		selectedRoutes:    func() route.HAMap { return nil },
 		activeRoutes:      func() route.HAMap { return nil },
@@ -2459,7 +2542,7 @@ func TestProjection_StopClearsHealthState(t *testing.T) {
 		NameServers: []nbdns.NameServer{{IP: srv.Addr(), NSType: nbdns.UDPNameServerType, Port: int(srv.Port())}},
 	}
 	stub := &healthStubHandler{health: map[netip.AddrPort]UpstreamHealth{srv: {LastOk: time.Now()}}}
-	server.dnsMuxMap["example.com"] = handlerWrapper{domain: "example.com", handler: stub, priority: PriorityUpstream}
+	server.dnsMuxHandlers = []handlerWrapper{{domain: "example.com", handler: stub, priority: PriorityUpstream}}

 	server.mux.Lock()
 	server.updateNSGroupStates([]*nbdns.NameServerGroup{group})
@@ -2484,6 +2567,32 @@ func TestProjection_StopClearsHealthState(t *testing.T) {
 // rule 3: startup failures while the peer is handshaking, then the peer
 // comes up and a query succeeds before the grace window elapses. No
 // warning should ever have fired, and no recovery either.
+func TestWarningDelayBaseFromEnv(t *testing.T) {
+	tests := []struct {
+		name string
+		set  bool
+		val  string
+		want time.Duration
+	}{
+		{name: "unset uses default", set: false, want: defaultWarningDelayBase},
+		{name: "valid override", set: true, val: "90s", want: 90 * time.Second},
+		{name: "valid minutes", set: true, val: "2m", want: 2 * time.Minute},
+		{name: "invalid falls back", set: true, val: "notaduration", want: defaultWarningDelayBase},
+		{name: "zero falls back", set: true, val: "0s", want: defaultWarningDelayBase},
+		{name: "negative falls back", set: true, val: "-30s", want: defaultWarningDelayBase},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Setenv(envWarningDelay, tc.val)
+			if !tc.set {
+				os.Unsetenv(envWarningDelay)
+			}
+			assert.Equal(t, tc.want, warningDelayBaseFromEnv(), "grace window base")
+		})
+	}
+}
+
 func TestProjection_OverlayRecoversDuringGrace(t *testing.T) {
 	fx := newProjTestFixture(t)
 	fx.server.warningDelayBase = 200 * time.Millisecond
@@ -2595,7 +2704,6 @@ func TestProjection_MixedGroupEmitsImmediately(t *testing.T) {
 	server := &DefaultServer{
 		ctx:              context.Background(),
 		statusRecorder:   recorder,
-		dnsMuxMap:        make(registeredHandlerMap),
 		selectedRoutes:   func() route.HAMap { return overlayMap },
 		activeRoutes:     func() route.HAMap { return nil },
 		warningDelayBase: time.Hour,
@@ -2613,7 +2721,7 @@ func TestProjection_MixedGroupEmitsImmediately(t *testing.T) {
 			overlay: {LastFail: time.Now(), LastErr: "timeout"},
 		},
 	}
-	server.dnsMuxMap["example.com"] = handlerWrapper{domain: "example.com", handler: stub, priority: PriorityUpstream}
+	server.dnsMuxHandlers = []handlerWrapper{{domain: "example.com", handler: stub, priority: PriorityUpstream}}

 	server.mux.Lock()
 	server.updateNSGroupStates([]*nbdns.NameServerGroup{group})
@@ -2640,7 +2748,6 @@ func TestDNSLoopPrevention(t *testing.T) {
 		localResolver: local.NewResolver(),
 		handlerChain:  NewHandlerChain(),
 		hostManager:   &noopHostConfigurator{},
-		dnsMuxMap:     make(registeredHandlerMap),
 	}

 	tests := []struct {
--- a/client/internal/dns/upstream.go
+++ b/client/internal/dns/upstream.go
@@ -443,29 +443,32 @@ func (u *upstreamResolverBase) queryUpstream(parentCtx context.Context, r *dns.M
 		return raceResult{}, &upstreamFailure{upstream: upstream, reason: "no response"}
 	}

+	// A valid response means the upstream is reachable, whatever the Rcode.
+	u.markUpstreamOk(upstream)
+
 	proto := ""
 	if upstreamProto != nil {
 		proto = upstreamProto.protocol
 	}

 	if rm.Rcode == dns.RcodeServerFailure || rm.Rcode == dns.RcodeRefused {
+		// SERVFAIL and REFUSED are per-question outcomes (DNSSEC-bogus names,
+		// refused zones, transient recursion errors), not reachability
+		// problems: fail over for a better answer but keep the upstream healthy.
 		if code, ok := nonRetryableEDE(rm); ok {
 			if !hadEdns {
-				stripOPT(rm)
+				resutil.StripOPT(rm)
 			}
-			u.markUpstreamOk(upstream)
 			return raceResult{msg: rm, upstream: upstream, protocol: proto, ede: edeName(code)}, nil
 		}
 		reason := dns.RcodeToString[rm.Rcode]
-		u.markUpstreamFail(upstream, reason)
 		return raceResult{}, &upstreamFailure{upstream: upstream, reason: reason}
 	}

 	if !hadEdns {
-		stripOPT(rm)
+		resutil.StripOPT(rm)
 	}

-	u.markUpstreamOk(upstream)
 	return raceResult{msg: rm, upstream: upstream, protocol: proto}, nil
 }

@@ -520,22 +523,6 @@ func upstreamUDPSize() uint16 {
 	return dns.MinMsgSize
 }

-// stripOPT removes any OPT pseudo-RRs from the response's Extra section so
-// the response complies with RFC 6891 when the client did not advertise EDNS0.
-func stripOPT(rm *dns.Msg) {
-	if len(rm.Extra) == 0 {
-		return
-	}
-	out := rm.Extra[:0]
-	for _, rr := range rm.Extra {
-		if _, ok := rr.(*dns.OPT); ok {
-			continue
-		}
-		out = append(out, rr)
-	}
-	rm.Extra = out
-}
-
 func (u *upstreamResolverBase) handleUpstreamError(err error, upstream netip.AddrPort, startTime time.Time) *upstreamFailure {
 	if !errors.Is(err, context.DeadlineExceeded) && !isTimeout(err) {
 		return &upstreamFailure{upstream: upstream, reason: err.Error()}
--- a/client/internal/dns/upstream_test.go
+++ b/client/internal/dns/upstream_test.go
@@ -517,6 +517,78 @@ func TestUpstreamResolver_HealthTracking(t *testing.T) {
 	assert.NotContains(t, health, bad, "sibling upstream should not be queried when primary answers")
 }

+// TestUpstreamResolver_HealthTracking_ResponseMeansReachable verifies that an
+// upstream which answers with SERVFAIL or REFUSED is recorded as healthy:
+// those are per-question outcomes from a reachable server and must not mark
+// the upstream unhealthy. Only transport failures (timeouts) do.
+func TestUpstreamResolver_HealthTracking_ResponseMeansReachable(t *testing.T) {
+	a := netip.MustParseAddrPort("192.0.2.10:53")
+	b := netip.MustParseAddrPort("192.0.2.11:53")
+	timeoutErr := &net.OpError{Op: "read", Err: fmt.Errorf("i/o timeout")}
+
+	tests := []struct {
+		name        string
+		respA       mockUpstreamResponse
+		respB       mockUpstreamResponse
+		wantHealthy bool
+	}{
+		{
+			name:        "both SERVFAIL are reachable",
+			respA:       mockUpstreamResponse{msg: buildMockResponse(dns.RcodeServerFailure, "")},
+			respB:       mockUpstreamResponse{msg: buildMockResponse(dns.RcodeServerFailure, "")},
+			wantHealthy: true,
+		},
+		{
+			name:        "both REFUSED are reachable",
+			respA:       mockUpstreamResponse{msg: buildMockResponse(dns.RcodeRefused, "")},
+			respB:       mockUpstreamResponse{msg: buildMockResponse(dns.RcodeRefused, "")},
+			wantHealthy: true,
+		},
+		{
+			name:        "timeout marks unhealthy",
+			respA:       mockUpstreamResponse{err: timeoutErr},
+			respB:       mockUpstreamResponse{err: timeoutErr},
+			wantHealthy: false,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			mockClient := &mockUpstreamResolverPerServer{
+				responses: map[string]mockUpstreamResponse{
+					a.String(): tc.respA,
+					b.String(): tc.respB,
+				},
+				rtt: time.Millisecond,
+			}
+
+			ctx, cancel := context.WithCancel(context.Background())
+			defer cancel()
+
+			resolver := &upstreamResolverBase{
+				ctx:             ctx,
+				upstreamClient:  mockClient,
+				upstreamTimeout: UpstreamTimeout,
+			}
+			resolver.addRace([]netip.AddrPort{a, b})
+
+			responseWriter := &test.MockResponseWriter{WriteMsgFunc: func(m *dns.Msg) error { return nil }}
+			resolver.ServeDNS(responseWriter, new(dns.Msg).SetQuestion("example.com.", dns.TypeA))
+
+			health := resolver.UpstreamHealth()
+			require.Contains(t, health, a, "primary upstream should have a health record")
+			if tc.wantHealthy {
+				assert.False(t, health[a].LastOk.IsZero(), "responding upstream should have LastOk set")
+				assert.True(t, health[a].LastFail.IsZero(), "responding upstream should not be marked failed")
+				assert.Empty(t, health[a].LastErr, "responding upstream should have no error")
+			} else {
+				assert.False(t, health[a].LastFail.IsZero(), "timed-out upstream should be marked failed")
+				assert.NotEmpty(t, health[a].LastErr, "timed-out upstream should record an error")
+			}
+		})
+	}
+}
+
 func TestFormatFailures(t *testing.T) {
 	testCases := []struct {
 		name     string
@@ -913,19 +985,6 @@ func TestEDEName(t *testing.T) {
 	assert.Equal(t, "EDE 9999", edeName(9999), "unknown code falls back to numeric")
 }

-func TestStripOPT(t *testing.T) {
-	rm := &dns.Msg{
-		Extra: []dns.RR{
-			&dns.OPT{Hdr: dns.RR_Header{Name: ".", Rrtype: dns.TypeOPT}},
-			&dns.A{Hdr: dns.RR_Header{Name: "x.", Rrtype: dns.TypeA}, A: net.IPv4(1, 2, 3, 4)},
-		},
-	}
-	stripOPT(rm)
-	assert.Len(t, rm.Extra, 1, "OPT should be removed, A kept")
-	_, isOPT := rm.Extra[0].(*dns.OPT)
-	assert.False(t, isOPT, "remaining record must not be OPT")
-}
-
 func TestUpstreamResolver_NonRetryableEDEShortCircuits(t *testing.T) {
 	upstream1 := netip.MustParseAddrPort("192.0.2.1:53")
 	upstream2 := netip.MustParseAddrPort("192.0.2.2:53")
--- a/client/internal/dnsfwd/forwarder.go
+++ b/client/internal/dnsfwd/forwarder.go
@@ -26,6 +26,15 @@ import (
 const errResolveFailed = "failed to resolve query for domain=%s: %v"
 const upstreamTimeout = 15 * time.Second

+// EDE info codes the forwarder emits on upstream failures so the querying
+// client can see the reason without inspecting this peer's logs. They live in
+// the RFC 8914 Private Use range (49152-65535); the Go resolver never exposes a
+// real upstream EDE here, so these cannot collide with a genuine code.
+const (
+	edeNetbirdUpstreamTimeout uint16 = 49152
+	edeNetbirdUpstreamFailure uint16 = 49153
+)
+
 type resolver interface {
 	LookupNetIP(ctx context.Context, network, host string) ([]netip.Addr, error)
 }
@@ -220,7 +229,7 @@ func (f *DNSForwarder) handleDNSQuery(logger *log.Entry, w dns.ResponseWriter, q

 	result := resutil.LookupIP(ctx, f.resolver, network, qname, question.Qtype)
 	if result.Err != nil {
-		f.handleDNSError(ctx, logger, w, question, resp, qname, result, startTime)
+		f.handleDNSError(ctx, logger, w, question, resp, qname, result, query.IsEdns0() != nil, startTime)
 		return
 	}

@@ -333,6 +342,7 @@ func (f *DNSForwarder) handleDNSError(
 	resp *dns.Msg,
 	domain string,
 	result resutil.LookupResult,
+	reqHasEdns bool,
 	startTime time.Time,
 ) {
 	qType := question.Qtype
@@ -374,6 +384,10 @@ func (f *DNSForwarder) handleDNSError(
 		logger.Warnf(errResolveFailed, domain, result.Err)
 	}

+	if reqHasEdns {
+		attachEDE(resp, edeCodeFor(dnsErr), edeText(dnsErr))
+	}
+
 	f.writeResponse(logger, w, resp, domain, startTime)
 }

@@ -414,3 +428,33 @@ func (f *DNSForwarder) getMatchingEntries(domain string) (route.ResID, []*Forwar

 	return selectedResId, matches
 }
+
+// edeCodeFor maps an upstream lookup error to the NetBird EDE info code.
+func edeCodeFor(dnsErr *net.DNSError) uint16 {
+	if dnsErr != nil && dnsErr.IsTimeout {
+		return edeNetbirdUpstreamTimeout
+	}
+	return edeNetbirdUpstreamFailure
+}
+
+// edeText builds the EDE extra-text describing the class of upstream failure.
+// It deliberately omits the upstream server address, which may be an internal
+// resolver and is exposed to any client permitted to use the route; the full
+// detail stays in the forwarder's local log.
+func edeText(dnsErr *net.DNSError) string {
+	if dnsErr != nil && dnsErr.IsTimeout {
+		return "netbird forwarder: upstream timeout"
+	}
+	return "netbird forwarder: upstream failure"
+}
+
+// attachEDE adds an Extended DNS Error (RFC 8914) option to the response,
+// creating the OPT pseudo-record if the response does not already carry one.
+func attachEDE(resp *dns.Msg, code uint16, text string) {
+	opt := resp.IsEdns0()
+	if opt == nil {
+		resp.SetEdns0(dns.DefaultMsgSize, false)
+		opt = resp.IsEdns0()
+	}
+	opt.Option = append(opt.Option, &dns.EDNS0_EDE{InfoCode: code, ExtraText: text})
+}
--- a/client/internal/dnsfwd/forwarder_test.go
+++ b/client/internal/dnsfwd/forwarder_test.go
@@ -16,6 +16,7 @@ import (
 	"github.com/stretchr/testify/require"

 	firewall "github.com/netbirdio/netbird/client/firewall/manager"
+	"github.com/netbirdio/netbird/client/internal/dns/resutil"
 	"github.com/netbirdio/netbird/client/internal/dns/test"
 	"github.com/netbirdio/netbird/client/internal/peer"
 	"github.com/netbirdio/netbird/route"
@@ -617,6 +618,85 @@ func TestDNSForwarder_ResponseCodes(t *testing.T) {
 	}
 }

+func TestDNSForwarder_UpstreamFailureEDE(t *testing.T) {
+	tests := []struct {
+		name        string
+		lookupErr   error
+		reqEdns     bool
+		wantEDE     bool
+		wantCode    uint16
+		wantTextHas string
+	}{
+		{
+			name:        "timeout with edns0",
+			lookupErr:   &net.DNSError{Err: "i/o timeout", Server: "10.0.0.53:53", IsTimeout: true},
+			reqEdns:     true,
+			wantEDE:     true,
+			wantCode:    edeNetbirdUpstreamTimeout,
+			wantTextHas: "netbird forwarder: upstream timeout",
+		},
+		{
+			name:        "server failure with edns0",
+			lookupErr:   &net.DNSError{Err: "server misbehaving", Server: "10.0.0.53:53"},
+			reqEdns:     true,
+			wantEDE:     true,
+			wantCode:    edeNetbirdUpstreamFailure,
+			wantTextHas: "netbird forwarder: upstream failure",
+		},
+		{
+			name:      "no edns0 in request omits ede",
+			lookupErr: &net.DNSError{Err: "server misbehaving", Server: "10.0.0.53:53"},
+			reqEdns:   false,
+			wantEDE:   false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockResolver := &MockResolver{}
+			forwarder := NewDNSForwarder(netip.MustParseAddrPort("127.0.0.1:0"), 300, nil, &peer.Status{}, nil)
+			forwarder.resolver = mockResolver
+
+			d, err := domain.FromString("example.com")
+			require.NoError(t, err)
+			forwarder.UpdateDomains([]*ForwarderEntry{{Domain: d, ResID: "test-res"}})
+
+			mockResolver.On("LookupNetIP", mock.Anything, "ip4", "example.com.").
+				Return([]netip.Addr(nil), tt.lookupErr).Once()
+
+			query := &dns.Msg{}
+			query.SetQuestion("example.com.", dns.TypeA)
+			if tt.reqEdns {
+				query.SetEdns0(dns.DefaultMsgSize, false)
+			}
+
+			var writtenResp *dns.Msg
+			mockWriter := &test.MockResponseWriter{
+				WriteMsgFunc: func(m *dns.Msg) error {
+					writtenResp = m
+					return nil
+				},
+			}
+
+			forwarder.handleDNSQuery(log.NewEntry(log.StandardLogger()), mockWriter, query, time.Now())
+			mockResolver.AssertExpectations(t)
+
+			require.NotNil(t, writtenResp, "expected a response")
+			assert.Equal(t, dns.RcodeServerFailure, writtenResp.Rcode, "upstream failure must be SERVFAIL")
+
+			ede, ok := resutil.ExtractEDE(writtenResp)
+			if !tt.wantEDE {
+				assert.False(t, ok, "response must not carry EDE")
+				return
+			}
+			require.True(t, ok, "response must carry EDE")
+			assert.Equal(t, tt.wantCode, ede.InfoCode, "EDE info code")
+			assert.Contains(t, ede.ExtraText, tt.wantTextHas, "EDE extra-text")
+			assert.NotContains(t, ede.ExtraText, "10.0.0.53", "must not leak upstream server address")
+		})
+	}
+}
+
 func TestDNSForwarder_TCPTruncation(t *testing.T) {
 	// Test that large UDP responses are truncated with TC bit set
 	mockResolver := &MockResolver{}
--- a/client/internal/engine.go
+++ b/client/internal/engine.go
@@ -86,6 +86,8 @@ const (

 var ErrResetConnection = fmt.Errorf("reset connection")

+var ErrEngineAlreadyStarted = errors.New("engine already started")
+
 type EngineConfig struct {
 	WgPort      int
 	WgIfaceName string
@@ -199,6 +201,8 @@ type Engine struct {
 	ctx    context.Context
 	cancel context.CancelFunc

+	started bool
+
 	wgInterface WGIface

 	udpMux *udpmux.UniversalUDPMuxDefault
@@ -279,9 +283,15 @@ func NewEngine(
 	services EngineServices,
 	mobileDep MobileDependency,
 ) *Engine {
+	// The engine is single-use: a fresh instance is built per connection
+	// cycle (see Client.run), so the run context is created once here rather
+	// than in Start.
+	ctx, cancel := context.WithCancel(clientCtx)
 	engine := &Engine{
 		clientCtx:          clientCtx,
 		clientCancel:       clientCancel,
+		ctx:                ctx,
+		cancel:             cancel,
 		signal:             services.SignalClient,
 		signaler:           peer.NewSignaler(services.SignalClient, config.WgPrivateKey),
 		mgmClient:          services.MgmClient,
@@ -314,8 +324,34 @@ func (e *Engine) Stop() error {
 		log.Debugf("tried stopping engine that is nil")
 		return nil
 	}
+	e.cancel()
 	e.syncMsgMux.Lock()

+	e.stopLocked()
+
+	e.syncMsgMux.Unlock()
+
+	timeout := e.calculateShutdownTimeout()
+	log.Debugf("waiting for goroutines to finish with timeout: %v", timeout)
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+
+	if err := waitWithContext(shutdownCtx, &e.shutdownWg); err != nil {
+		log.Warnf("shutdown timeout exceeded after %v, some goroutines may still be running", timeout)
+	}
+
+	log.Infof("stopped Netbird Engine")
+
+	return nil
+}
+
+// stopLocked tears down everything Start may have brought up, in the order
+// teardown requires (DNS before the interface goes down, flow manager after).
+// The caller must hold syncMsgMux. It is shared by Stop and by Start's failure
+// path, so a partially-initialized engine is cleaned up the same way; every
+// step is nil-guarded. It does not wait on shutdownWg — the caller does that
+// after releasing the lock, since the goroutines also take syncMsgMux.
+func (e *Engine) stopLocked() {
 	if e.connMgr != nil {
 		e.connMgr.Close()
 	}
@@ -366,10 +402,6 @@ func (e *Engine) Stop() error {
 	// so dbus and friends don't complain because of a missing interface
 	e.stopDNSServer()

-	if e.cancel != nil {
-		e.cancel()
-	}
-
 	e.jobExecutorWG.Wait() // block until job goroutines finish

 	e.close()
@@ -388,21 +420,6 @@ func (e *Engine) Stop() error {
 	if err := e.stateManager.PersistState(context.Background()); err != nil {
 		log.Errorf("failed to persist state: %v", err)
 	}
-
-	e.syncMsgMux.Unlock()
-
-	timeout := e.calculateShutdownTimeout()
-	log.Debugf("waiting for goroutines to finish with timeout: %v", timeout)
-	shutdownCtx, cancel := context.WithTimeout(context.Background(), timeout)
-	defer cancel()
-
-	if err := waitWithContext(shutdownCtx, &e.shutdownWg); err != nil {
-		log.Warnf("shutdown timeout exceeded after %v, some goroutines may still be running", timeout)
-	}
-
-	log.Infof("stopped Netbird Engine")
-
-	return nil
 }

 // calculateShutdownTimeout returns shutdown timeout: 10s base + 100ms per peer, capped at 30s.
@@ -440,18 +457,38 @@ func waitWithContext(ctx context.Context, wg *sync.WaitGroup) error {
 // Start creates a new WireGuard tunnel interface and listens to events from Signal and Management services
 // Connections to remote peers are not established here.
 // However, they will be established once an event with a list of peers to connect to will be received from Management Service
-func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL) error {
+func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL) (err error) {
 	e.syncMsgMux.Lock()
 	defer e.syncMsgMux.Unlock()

-	if err := iface.ValidateMTU(e.config.MTU); err != nil {
+	// The engine is single-use. Reject a duplicate start and a start on an
+	// already-stopped engine (run context cancelled).
+	if e.started {
+		return ErrEngineAlreadyStarted
+	}
+
+	if ctxErr := e.ctx.Err(); ctxErr != nil {
+		return fmt.Errorf("engine already stopped: %w", ctxErr)
+	}
+
+	e.started = true
+
+	// Tear down any partially-initialized state on a failed start. Cancel the
+	// run context first so goroutines started before the failure (connMgr,
+	// srWatcher, monitors) unwind, then stopLocked mirrors Stop's teardown (we
+	// already hold syncMsgMux), cleaning up route/DNS/flow/state managers too,
+	// not just what close() covers.
+	defer func() {
+		if err != nil {
+			e.cancel()
+			e.stopLocked()
+		}
+	}()
+
+	if err = iface.ValidateMTU(e.config.MTU); err != nil {
 		return fmt.Errorf("invalid MTU configuration: %w", err)
 	}

-	if e.cancel != nil {
-		e.cancel()
-	}
-	e.ctx, e.cancel = context.WithCancel(e.clientCtx)
 	e.exposeManager = expose.NewManager(e.ctx, e.mgmClient)

 	wgIface, err := e.newWgIface()
@@ -485,13 +522,11 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)

 	initialRoutes, dnsConfig, dnsFeatureFlag, err := e.readInitialSettings()
 	if err != nil {
-		e.close()
 		return fmt.Errorf("read initial settings: %w", err)
 	}

 	dnsServer, err := e.newDnsServer(dnsConfig)
 	if err != nil {
-		e.close()
 		return fmt.Errorf("create dns server: %w", err)
 	}
 	e.dnsServer = dnsServer
@@ -526,7 +561,6 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)

 	if err = e.wgInterfaceCreate(); err != nil {
 		log.Errorf("failed creating tunnel interface %s: [%s]", e.config.WgIfaceName, err.Error())
-		e.close()
 		return fmt.Errorf("create wg interface: %w", err)
 	}

@@ -535,7 +569,6 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
 	}

 	if err := e.createFirewall(); err != nil {
-		e.close()
 		return err
 	}

@@ -547,7 +580,6 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
 	e.udpMux, err = e.wgInterface.Up()
 	if err != nil {
 		log.Errorf("failed to pull up wgInterface [%s]: %s", e.wgInterface.Name(), err.Error())
-		e.close()
 		return fmt.Errorf("up wg interface: %w", err)
 	}

@@ -572,9 +604,7 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
 		e.acl = acl.NewDefaultManager(e.firewall)
 	}

-	err = e.dnsServer.Initialize()
-	if err != nil {
-		e.close()
+	if err := e.dnsServer.Initialize(); err != nil {
 		return fmt.Errorf("initialize dns server: %w", err)
 	}

@@ -586,7 +616,9 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
 	e.srWatcher = guard.NewSRWatcher(e.signal, e.relayManager, e.mobileDep.IFaceDiscover, iceCfg)
 	e.srWatcher.Start(peer.IsForceRelayed())

-	e.receiveSignalEvents()
+	if err = e.receiveSignalEvents(); err != nil {
+		return err
+	}
 	e.receiveManagementEvents()
 	e.receiveJobEvents()

@@ -638,7 +670,6 @@ func (e *Engine) createFirewall() error {

 func (e *Engine) initFirewall() error {
 	if err := e.routeManager.SetFirewall(e.firewall); err != nil {
-		e.close()
 		return fmt.Errorf("set firewall: %w", err)
 	}

@@ -1698,7 +1729,7 @@ func (e *Engine) createPeerConn(pubKey string, allowedIPs []netip.Prefix, agentV
 }

 // receiveSignalEvents connects to the Signal Service event stream to negotiate connection with remote peers
-func (e *Engine) receiveSignalEvents() {
+func (e *Engine) receiveSignalEvents() error {
 	e.shutdownWg.Add(1)
 	go func() {
 		defer e.shutdownWg.Done()
@@ -1769,7 +1800,12 @@ func (e *Engine) receiveSignalEvents() {
 		}
 	}()

-	e.signal.WaitStreamConnected()
+	// todo: consider to remove this blocker. I do not see benefit to block the Start operations
+	e.signal.WaitStreamConnected(e.ctx)
+	if err := e.ctx.Err(); err != nil {
+		return fmt.Errorf("wait for signal stream: %w", err)
+	}
+	return nil
 }

 func (e *Engine) parseNATExternalIPMappings() []string {
--- a/client/internal/engine_test.go
+++ b/client/internal/engine_test.go
@@ -247,7 +247,7 @@ func TestEngine_SSH(t *testing.T) {
 		return
 	}

-	ctx, cancel := context.WithCancel(context.Background())
+	ctx, cancel := context.WithCancel(CtxInitState(context.Background()))
 	defer cancel()

 	relayMgr := relayClient.NewManager(ctx, nil, key.PublicKey().String(), iface.DefaultMTU)
@@ -426,7 +426,7 @@ func TestEngine_UpdateNetworkMap(t *testing.T) {
 		return
 	}

-	ctx, cancel := context.WithCancel(context.Background())
+	ctx, cancel := context.WithCancel(CtxInitState(context.Background()))
 	defer cancel()

 	relayMgr := relayClient.NewManager(ctx, nil, key.PublicKey().String(), iface.DefaultMTU)
@@ -638,7 +638,7 @@ func TestEngine_Sync(t *testing.T) {
 		return
 	}

-	ctx, cancel := context.WithCancel(context.Background())
+	ctx, cancel := context.WithCancel(CtxInitState(context.Background()))
 	defer cancel()

 	// feed updates to Engine via mocked Management client
@@ -817,7 +817,7 @@ func TestEngine_UpdateNetworkMapWithRoutes(t *testing.T) {
 				return
 			}

-			ctx, cancel := context.WithCancel(context.Background())
+			ctx, cancel := context.WithCancel(CtxInitState(context.Background()))
 			defer cancel()

 			wgIfaceName := fmt.Sprintf("utun%d", 104+n)
@@ -1024,7 +1024,7 @@ func TestEngine_UpdateNetworkMapWithDNSUpdate(t *testing.T) {
 				return
 			}

-			ctx, cancel := context.WithCancel(context.Background())
+			ctx, cancel := context.WithCancel(CtxInitState(context.Background()))
 			defer cancel()

 			wgIfaceName := fmt.Sprintf("utun%d", 104+n)
--- a/client/internal/routemanager/dnsinterceptor/handler.go
+++ b/client/internal/routemanager/dnsinterceptor/handler.go
@@ -251,6 +251,14 @@ func (d *DnsInterceptor) ServeDNS(w dns.ResponseWriter, r *dns.Msg) {
 		r.MsgHdr.AuthenticatedData = true
 	}

+	// Advertise EDNS0 to the forwarder so it may return an Extended DNS Error
+	// describing why a lookup failed. The OPT is stripped from the reply when
+	// the original client did not request EDNS0.
+	hadEdns := r.IsEdns0() != nil
+	if !hadEdns {
+		r.SetEdns0(dns.DefaultMsgSize, false)
+	}
+
 	upstream := net.JoinHostPort(upstreamIP.String(), strconv.FormatUint(uint64(d.forwarderPort.Load()), 10))
 	ctx, cancel := context.WithTimeout(context.Background(), dnsTimeout)
 	defer cancel()
@@ -260,6 +268,13 @@ func (d *DnsInterceptor) ServeDNS(w dns.ResponseWriter, r *dns.Msg) {
 		return
 	}

+	if ede, ok := resutil.ExtractEDE(reply); ok {
+		resutil.SetMeta(w, "ede", fmt.Sprintf("%d %s", ede.InfoCode, ede.ExtraText))
+	}
+	if !hadEdns {
+		resutil.StripOPT(reply)
+	}
+
 	resutil.SetMeta(w, "peer", peerKey)

 	reply.Id = r.Id
--- a/client/ios/NetBirdSDK/login.go
+++ b/client/ios/NetBirdSDK/login.go
@@ -36,6 +36,7 @@ type URLOpener interface {
 // Auth can register or login new client
 type Auth struct {
 	ctx     context.Context
+	cancel  context.CancelFunc
 	config  *profilemanager.Config
 	cfgPath string
 }
@@ -51,8 +52,19 @@ func NewAuth(cfgPath string, mgmURL string) (*Auth, error) {
 		return nil, err
 	}

+	// Use a cancellable context so Stop() can abort an in-progress interactive
+	// login. The PKCE flow's WaitToken blocks (and keeps its loopback HTTP server
+	// bound to a port) until the OAuth callback arrives or the flow expires;
+	// cancelling the context unblocks WaitToken, which then shuts that server down
+	// and frees the port for the next login attempt. iOS runs login in the main-app
+	// process (decoupled from the network extension), so without this the server
+	// lingers after the user dismisses the browser and the next connect stalls
+	// trying to bind the same port.
+	ctx, cancel := context.WithCancel(context.Background())
+
 	return &Auth{
-		ctx:     context.Background(),
+		ctx:     ctx,
+		cancel:  cancel,
 		config:  cfg,
 		cfgPath: cfgPath,
 	}, nil
@@ -60,12 +72,24 @@ func NewAuth(cfgPath string, mgmURL string) (*Auth, error) {

 // NewAuthWithConfig instantiate Auth based on existing config
 func NewAuthWithConfig(ctx context.Context, config *profilemanager.Config) *Auth {
+	ctx, cancel := context.WithCancel(ctx)
 	return &Auth{
 		ctx:    ctx,
+		cancel: cancel,
 		config: config,
 	}
 }

+// Stop aborts an in-progress interactive login started via Login/LoginWithDeviceName.
+// It cancels the auth context, which unblocks the PKCE WaitToken and shuts down its
+// loopback HTTP server, freeing the redirect port. Safe to call multiple times and
+// safe to call when no login is running.
+func (a *Auth) Stop() {
+	if a.cancel != nil {
+		a.cancel()
+	}
+}
+
 // SaveConfigIfSSOSupported test the connectivity with the management server by retrieving the server device flow info.
 // If it returns a flow info than save the configuration and return true. If it gets a codes.NotFound, it means that SSO
 // is not supported and returns false without saving the configuration. For other errors return false.
--- a/client/server/server.go
+++ b/client/server/server.go
@@ -993,6 +993,10 @@ func (s *Server) cleanupConnection() error {
 		return nil
 	}

+	// TODO: consider calling s.connectClient.Stop() instead of engine.Stop().
+	// actCancel() lets the run loop stop the engine too, so both stop it
+	// concurrently; ConnectClient.Stop cancels and waits for the run loop,
+	// making the run loop the sole owner of engine shutdown.
 	if engine != nil {
 		if err := engine.Stop(); err != nil {
 			return err
--- a/infrastructure_files/getting-started-enterprise.sh
+++ b/infrastructure_files/getting-started-enterprise.sh
@@ -0,0 +1,616 @@
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+# NetBird Enterprise — Getting Started
+# Single-node bootstrap for a self-hosted NetBird Enterprise stack with the
+# embedded identity provider. Owner is created via first-login flow.
+
+SED_STRIP_PADDING='s/=//g'
+
+check_docker_compose() {
+  if command -v docker-compose &> /dev/null; then
+    echo "docker-compose"
+    return
+  fi
+  if docker compose --help &> /dev/null; then
+    echo "docker compose"
+    return
+  fi
+  echo "docker-compose is not installed or not in PATH. See https://docs.docker.com/engine/install/" > /dev/stderr
+  exit 1
+}
+
+check_openssl() {
+  if ! command -v openssl &> /dev/null; then
+    echo "openssl is not installed or not in PATH." > /dev/stderr
+    exit 1
+  fi
+}
+
+rand_secret() {
+  openssl rand -base64 32 | sed "$SED_STRIP_PADDING"
+}
+
+rand_b64_key() {
+  openssl rand -base64 32
+}
+
+check_nb_domain() {
+  local domain="$1"
+  if [[ -z "$domain" ]]; then
+    echo "The domain cannot be empty." > /dev/stderr
+    return 1
+  fi
+  if [[ "$domain" == "netbird.example.com" ]]; then
+    echo "The domain cannot be netbird.example.com" > /dev/stderr
+    return 1
+  fi
+  if [[ "$domain" =~ ^[0-9.]+$ ]]; then
+    echo "An IP address is not allowed. A real DNS-resolvable domain is required for TLS and the embedded IdP issuer." > /dev/stderr
+    return 1
+  fi
+  if [[ ! "$domain" =~ ^[A-Za-z0-9]([A-Za-z0-9-]*[A-Za-z0-9])?(\.[A-Za-z0-9]([A-Za-z0-9-]*[A-Za-z0-9])?)+$ ]]; then
+    echo "The value '$domain' is not a valid FQDN. A real DNS-resolvable domain is required for TLS and the embedded IdP issuer." > /dev/stderr
+    return 1
+  fi
+  return 0
+}
+
+check_domain_resolves() {
+  local domain="$1"
+  if command -v getent &> /dev/null && getent hosts "$domain" &> /dev/null; then return 0; fi
+  if command -v host &> /dev/null && host "$domain" &> /dev/null; then return 0; fi
+  if command -v dig &> /dev/null && [[ -n "$(dig +short "$domain" 2>/dev/null)" ]]; then return 0; fi
+  if command -v nslookup &> /dev/null && nslookup "$domain" &> /dev/null; then return 0; fi
+  return 1
+}
+
+read_nb_domain() {
+  local value=""
+  echo -n "Enter the FQDN for NetBird (must resolve via DNS, e.g. netbird.my-domain.com): " > /dev/stderr
+  read -r value < /dev/tty
+  if ! check_nb_domain "$value"; then
+    read_nb_domain
+    return
+  fi
+  if ! check_domain_resolves "$value"; then
+    echo "" > /dev/stderr
+    echo "Warning: '$value' does not resolve via DNS from this host." > /dev/stderr
+    echo "Caddy will not be able to issue TLS certificates until it does." > /dev/stderr
+    local confirm=""
+    echo -n "Continue anyway? [y/N]: " > /dev/stderr
+    read -r confirm < /dev/tty
+    if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
+      read_nb_domain
+      return
+    fi
+  fi
+  echo "$value"
+}
+
+read_required() {
+  local prompt="$1"
+  local value=""
+  while [[ -z "$value" ]]; do
+    echo -n "$prompt: " > /dev/stderr
+    read -r value < /dev/tty
+    if [[ -z "$value" ]]; then
+      echo "Value cannot be empty." > /dev/stderr
+    fi
+  done
+  echo "$value"
+}
+
+read_secret() {
+  local prompt="$1"
+  local value=""
+  while [[ -z "$value" ]]; do
+    echo -n "$prompt: " > /dev/stderr
+    read -rs value < /dev/tty
+    echo "" > /dev/stderr
+    if [[ -z "$value" ]]; then
+      echo "Value cannot be empty." > /dev/stderr
+    fi
+  done
+  echo "$value"
+}
+
+# read_yes_no "<prompt>" [<default y|n>]
+read_yes_no() {
+  local prompt="$1"
+  local default="${2:-n}"
+  local hint
+  if [[ "$default" == "y" ]]; then
+    hint="[Y/n]"
+  else
+    hint="[y/N]"
+  fi
+  echo -n "${prompt} ${hint}: " > /dev/stderr
+  local ans=""
+  read -r ans < /dev/tty
+  if [[ -z "$ans" ]]; then
+    ans="$default"
+  fi
+  case "$ans" in
+    [Yy] | [Yy][Ee][Ss]) echo "yes" ;;
+    *) echo "no" ;;
+  esac
+}
+
+wait_postgres() {
+  set +e
+  echo -n "Waiting for postgres to become ready"
+  local counter=1
+  while true; do
+    if $DOCKER_COMPOSE_COMMAND exec -T postgres pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB" &> /dev/null; then
+      break
+    fi
+    if [[ $counter -eq 60 ]]; then
+      echo ""
+      echo "Postgres is taking too long. Recent logs:"
+      $DOCKER_COMPOSE_COMMAND logs --tail=20 postgres
+      exit 1
+    fi
+    echo -n " ."
+    sleep 2
+    counter=$((counter + 1))
+  done
+  echo " done"
+  set -e
+}
+
+init_environment() {
+  check_openssl
+  DOCKER_COMPOSE_COMMAND=$(check_docker_compose)
+
+  if [[ -f .env ]] || [[ -f docker-compose.yml ]] || [[ -f config.yaml ]] || [[ -f Caddyfile ]]; then
+    echo "Generated files already exist in $(pwd)."
+    echo "If you want to reinitialize the environment, please remove them first:"
+    echo "  $DOCKER_COMPOSE_COMMAND down --volumes # removes all containers and volumes"
+    echo "  rm -f .env docker-compose.yml Caddyfile config.yaml"
+    echo "Be aware this will remove all data from the database."
+    exit 1
+  fi
+
+  echo "NetBird Enterprise bootstrap"
+  echo ""
+  echo "Traffic flow:"
+  echo "  Enables traffic events logging on the management server."
+  echo "  When enabled, the NetBird stack also runs NATS along with two"
+  echo "  additional containers: netbird-receiver (the traffic log receiver"
+  echo "  service) and netbird-enricher (the traffic log enricher service)."
+  echo "  It still has to be turned on from the dashboard settings afterwards."
+  echo "  See https://docs.netbird.io/manage/activity/traffic-events-logging"
+  NETBIRD_TRAFFIC_FLOW=$(read_yes_no "Enable traffic flow" "n")
+
+  echo ""
+  NETBIRD_DOMAIN=$(read_nb_domain)
+
+  echo ""
+
+  NETBIRD_LICENSE_KEY=$(read_secret "Enter license key (input hidden)")
+
+  GHCR_USERNAME="netbirdExtAccess1"
+  GHCR_TOKEN=$(read_secret "Enter GHCR token (input hidden)")
+
+  POSTGRES_USER="netbird"
+  POSTGRES_DB="netbird"
+  POSTGRES_PASSWORD=$(rand_secret)
+  NETBIRD_ENCRYPTION_KEY=$(rand_b64_key)
+  NETBIRD_RELAY_AUTH_SECRET=$(rand_secret)
+
+  POSTGRES_DSN="host=postgres user=${POSTGRES_USER} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB} port=5432 sslmode=disable TimeZone=UTC"
+  NETBIRD_RELAY_ENDPOINT="rels://${NETBIRD_DOMAIN}:443"
+
+  echo ""
+  echo "Selected:"
+  echo "  Traffic flow: ${NETBIRD_TRAFFIC_FLOW}"
+  echo "  Domain:       ${NETBIRD_DOMAIN}"
+  echo ""
+  echo "Rendering files into $(pwd) ..."
+  install -m 600 /dev/null .env
+  render_env >> .env
+  render_docker_compose > docker-compose.yml
+
+  if [[ -z "${NETBIRD_LICENSE_SERVER_BASE_URL:-}" ]]; then
+    sed -i.bak '/NETBIRD_LICENSE_SERVER_BASE_URL/d' docker-compose.yml && rm -f docker-compose.yml.bak
+  fi
+  render_caddyfile > Caddyfile
+  install -m 600 /dev/null config.yaml
+  render_config_yaml >> config.yaml
+
+  echo "Logging in to ghcr.io ..."
+  printf '%s' "$GHCR_TOKEN" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin
+  unset GHCR_TOKEN
+
+  echo ""
+  echo "Pulling images ..."
+  $DOCKER_COMPOSE_COMMAND pull
+
+  echo ""
+  echo "Starting postgres ..."
+  $DOCKER_COMPOSE_COMMAND up -d postgres
+  sleep 2
+  wait_postgres
+
+  echo ""
+  echo "Starting remaining services ..."
+  $DOCKER_COMPOSE_COMMAND up -d
+
+  echo ""
+  echo "Done."
+  echo ""
+  echo "Dashboard: https://${NETBIRD_DOMAIN}"
+  echo ""
+  echo "Open the dashboard in a browser to complete the first-login owner setup."
+  echo "All configuration and secrets are stored (mode 600) in $(pwd)/.env"
+  echo ""
+  echo "Tail logs:"
+  echo "  cd $(pwd) && $DOCKER_COMPOSE_COMMAND logs -f netbird-server caddy"
+}
+
+# ------------------------------------------------------------------
+# Renderers
+# ------------------------------------------------------------------
+
+render_env() {
+  cat <<EOF
+# Generated by getting-started-enterprise.sh
+# Holds all configuration and secrets for the stack. Mode 600.
+
+# Features (set by the script; don't edit without re-running)
+NETBIRD_TRAFFIC_FLOW_ENABLED=${NETBIRD_TRAFFIC_FLOW}
+
+# Domain
+NETBIRD_DOMAIN=${NETBIRD_DOMAIN}
+
+# Image tags. Default to "latest"
+NETBIRD_DASHBOARD_TAG=${NETBIRD_DASHBOARD_TAG:-latest}
+NETBIRD_SERVER_TAG=${NETBIRD_SERVER_TAG:-latest}
+EOF
+
+  if [[ "$NETBIRD_TRAFFIC_FLOW" == "yes" ]]; then
+    cat <<EOF
+NETBIRD_ENRICHER_TAG=${NETBIRD_ENRICHER_TAG:-latest}
+NETBIRD_RECEIVER_TAG=${NETBIRD_RECEIVER_TAG:-latest}
+EOF
+  fi
+
+  cat <<EOF
+
+# License keys
+EOF
+  if [[ -n "${NETBIRD_LICENSE_SERVER_BASE_URL:-}" ]]; then
+    cat <<EOF
+NETBIRD_LICENSE_SERVER_BASE_URL=${NETBIRD_LICENSE_SERVER_BASE_URL}
+EOF
+  fi
+  cat <<EOF
+NETBIRD_LICENSE_KEY=${NETBIRD_LICENSE_KEY}
+EOF
+
+  cat <<EOF
+
+# Postgres
+POSTGRES_USER=${POSTGRES_USER}
+POSTGRES_DB=${POSTGRES_DB}
+POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
+NETBIRD_STORE_ENGINE_POSTGRES_DSN=${POSTGRES_DSN}
+
+# Relay
+NETBIRD_RELAY_ENDPOINT=${NETBIRD_RELAY_ENDPOINT}
+NETBIRD_RELAY_AUTH_SECRET=${NETBIRD_RELAY_AUTH_SECRET}
+
+# Datastore encryption
+NETBIRD_ENCRYPTION_KEY=${NETBIRD_ENCRYPTION_KEY}
+
+# Dashboard OIDC scopes
+NETBIRD_AUTH_SUPPORTED_SCOPES=${NETBIRD_AUTH_SUPPORTED_SCOPES:-openid profile email groups}
+EOF
+}
+
+render_docker_compose() {
+  render_compose_header
+  render_compose_common
+  render_compose_server
+  if [[ "$NETBIRD_TRAFFIC_FLOW" == "yes" ]]; then
+    render_compose_flow
+  fi
+  render_compose_postgres
+  render_compose_footer
+}
+
+render_compose_header() {
+  cat <<'EOF'
+x-default: &default
+  restart: unless-stopped
+  logging:
+    driver: json-file
+    options:
+      max-size: '500m'
+      max-file: '2'
+
+services:
+EOF
+}
+
+render_compose_common() {
+  cat <<'EOF'
+  caddy:
+    <<: *default
+    image: caddy:2
+    container_name: netbird-caddy
+    networks: [netbird]
+    environment:
+      - CADDY_SECURE_DOMAIN=${NETBIRD_DOMAIN}
+    ports:
+      - '443:443'
+      - '443:443/udp'
+      - '80:80'
+    volumes:
+      - netbird_caddy_data:/data
+      - ./Caddyfile:/etc/caddy/Caddyfile
+
+  dashboard:
+    <<: *default
+    image: ghcr.io/netbirdio/dashboard-cloud:${NETBIRD_DASHBOARD_TAG}
+    container_name: netbird-dashboard
+    networks: [netbird]
+    environment:
+      - NETBIRD_MGMT_API_ENDPOINT=https://${NETBIRD_DOMAIN}
+      - NETBIRD_MGMT_GRPC_API_ENDPOINT=https://${NETBIRD_DOMAIN}
+      - AUTH_AUDIENCE=netbird-dashboard
+      - AUTH_CLIENT_ID=netbird-dashboard
+      - AUTH_CLIENT_SECRET=
+      - AUTH_AUTHORITY=https://${NETBIRD_DOMAIN}/oauth2
+      - USE_AUTH0=false
+      - AUTH_SUPPORTED_SCOPES=${NETBIRD_AUTH_SUPPORTED_SCOPES}
+      - AUTH_REDIRECT_URI=/nb-auth
+      - AUTH_SILENT_REDIRECT_URI=/nb-silent-auth
+      - NETBIRD_TOKEN_SOURCE=accessToken
+      - NGINX_SSL_PORT=443
+      - LETSENCRYPT_DOMAIN=
+      - LETSENCRYPT_EMAIL=
+
+EOF
+}
+
+render_compose_server() {
+  cat <<'EOF'
+  netbird-server:
+    <<: *default
+    image: ghcr.io/netbirdio/netbird-server-cloud:${NETBIRD_SERVER_TAG}
+    container_name: netbird-server
+    networks: [netbird]
+    depends_on:
+      dashboard:
+        condition: service_started
+      postgres:
+        condition: service_healthy
+    ports:
+      - '3478:3478/udp'
+    volumes:
+      - netbird_data:/var/lib/netbird
+      - ./config.yaml:/etc/netbird/config.yaml
+    command: ["--config", "/etc/netbird/config.yaml"]
+    environment:
+      - NB_LICENSE_KEY=${NETBIRD_LICENSE_KEY}
+      - NETBIRD_LICENSE_SERVER_BASE_URL=${NETBIRD_LICENSE_SERVER_BASE_URL}
+
+EOF
+}
+
+render_compose_flow() {
+  cat <<'EOF'
+  nats:
+    <<: *default
+    image: nats:2
+    container_name: netbird-nats
+    networks: [netbird]
+    volumes:
+      - netbird_nats_data:/data
+    command: ["-m", "8222", "--jetstream", "--store_dir", "/data"]
+
+  enricher:
+    <<: *default
+    image: ghcr.io/netbirdio/flow-enricher-cloud:${NETBIRD_ENRICHER_TAG}
+    container_name: netbird-enricher
+    networks: [netbird]
+    depends_on:
+      postgres:
+        condition: service_healthy
+      nats:
+        condition: service_started
+    volumes:
+      - netbird_enricher:/var/lib/netbird
+    environment:
+      - NB_LICENSE_KEY=${NETBIRD_LICENSE_KEY}
+      - NETBIRD_LICENSE_SERVER_BASE_URL=${NETBIRD_LICENSE_SERVER_BASE_URL}
+      - NB_DATADIR=/var/lib/netbird
+      - NB_MANAGEMENT_STORE_ENGINE=postgres
+      - NB_MANAGEMENT_POSTGRES_DSN=${NETBIRD_STORE_ENGINE_POSTGRES_DSN}
+      - NETBIRD_STORE_ENGINE_POSTGRES_DSN=${NETBIRD_STORE_ENGINE_POSTGRES_DSN}
+      - NB_TRAFFIC_EVENT_POSTGRES_DSN=${NETBIRD_STORE_ENGINE_POSTGRES_DSN}
+      - NB_TRAFFIC_EVENT_STORE_ENGINE=postgres
+      - NB_MANAGEMENT_STORE_KEY=${NETBIRD_ENCRYPTION_KEY}
+      - NB_FLOW_ADAPTER_TYPE=nats
+      - NB_FLOW_NATS_ENDPOINTS=nats://nats:4222
+      - NB_FLOW_NATS_STREAM=traffic-events
+      - NB_METRICS_PORT=9091
+      - NB_PERSISTENCE_RETENTION_PERIOD=168h
+
+  receiver:
+    <<: *default
+    image: ghcr.io/netbirdio/flow-receiver-cloud:${NETBIRD_RECEIVER_TAG}
+    container_name: netbird-receiver
+    networks: [netbird]
+    depends_on:
+      nats:
+        condition: service_started
+    environment:
+      - NB_LICENSE_KEY=${NETBIRD_LICENSE_KEY}
+      - NETBIRD_LICENSE_SERVER_BASE_URL=${NETBIRD_LICENSE_SERVER_BASE_URL}
+      - NB_FLOW_LISTEN_PORT=80
+      - NB_FLOW_ADAPTER_TYPE=nats
+      - NB_FLOW_NATS_ENDPOINTS=nats://nats:4222
+      - NB_FLOW_NATS_STREAM=traffic-events
+      - NB_FLOW_AUTH_SECRET=${NETBIRD_RELAY_AUTH_SECRET}
+
+EOF
+}
+
+render_compose_postgres() {
+  cat <<'EOF'
+  postgres:
+    <<: *default
+    image: postgres:17
+    container_name: netbird-postgres
+    networks: [netbird]
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
+      - POSTGRES_DB=${POSTGRES_DB}
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    volumes:
+      - netbird_postgres:/var/lib/postgresql/data
+
+EOF
+}
+
+render_compose_footer() {
+  cat <<'EOF'
+volumes:
+  netbird_data:
+EOF
+  if [[ "$NETBIRD_TRAFFIC_FLOW" == "yes" ]]; then
+    cat <<'EOF'
+  netbird_nats_data:
+  netbird_enricher:
+EOF
+  fi
+  cat <<'EOF'
+  netbird_postgres:
+  netbird_caddy_data:
+
+networks:
+  netbird:
+EOF
+}
+
+render_caddyfile() {
+  cat <<'EOF'
+{
+  servers :80,:443 {
+    protocols h1 h2c h2 h3
+  }
+}
+
+(security_headers) {
+    header * {
+        Strict-Transport-Security "max-age=3600; includeSubDomains; preload"
+        X-Content-Type-Options "nosniff"
+        X-Frame-Options "SAMEORIGIN"
+        X-XSS-Protection "1; mode=block"
+        -Server
+        Referrer-Policy strict-origin-when-cross-origin
+    }
+}
+
+:80 {
+    redir https://{$CADDY_SECURE_DOMAIN}{uri} permanent
+}
+
+{$CADDY_SECURE_DOMAIN}:443 {
+    import security_headers
+    # Signal (gRPC over h2c)
+    reverse_proxy /signalexchange.SignalExchange/* h2c://netbird-server:80
+    # Management (gRPC over h2c + HTTP)
+    reverse_proxy /management.ManagementService/* h2c://netbird-server:80
+    reverse_proxy /api/* netbird-server:80
+    reverse_proxy /ws-proxy/* netbird-server:80
+    # Embedded IdP (OAuth2 endpoints served by netbird server)
+    reverse_proxy /oauth2/* netbird-server:80
+    # Relay (WebSocket multiplexed on the same port)
+    reverse_proxy /relay* netbird-server:80
+EOF
+
+  if [[ "$NETBIRD_TRAFFIC_FLOW" == "yes" ]]; then
+    cat <<'EOF'
+    # Flow receiver (gRPC over h2c)
+    reverse_proxy /flow.FlowService/* h2c://receiver:80
+EOF
+  fi
+
+  cat <<'EOF'
+    # Dashboard
+    reverse_proxy /* dashboard:80
+}
+EOF
+}
+
+render_config_yaml() {
+  cat <<EOF
+# NetBird Enterprise server configuration.
+# Generated by getting-started-enterprise.sh. Mode 600.
+
+server:
+  listenAddress: ":80"
+  exposedAddress: "https://${NETBIRD_DOMAIN}:443"
+
+  metricsPort: 9090
+  healthcheckAddress: ":9000"
+
+  logLevel: "info"
+  logFile: "console"
+
+  # TLS is terminated by Caddy in front; leave this block empty.
+  tls:
+    certFile: ""
+    keyFile: ""
+    letsencrypt:
+      enabled: false
+
+  authSecret: "${NETBIRD_RELAY_AUTH_SECRET}"
+  dataDir: "/var/lib/netbird/"
+
+  disableAnonymousMetrics: false
+  disableGeoliteUpdate: false
+
+  auth:
+    issuer: "https://${NETBIRD_DOMAIN}/oauth2"
+    localAuthDisabled: false
+    signKeyRefreshEnabled: false
+    dashboardRedirectURIs:
+      - "https://${NETBIRD_DOMAIN}/nb-auth"
+      - "https://${NETBIRD_DOMAIN}/nb-silent-auth"
+    cliRedirectURIs:
+      - "http://localhost:53000/"
+
+  store:
+    engine: "postgres"
+    dsn: "${POSTGRES_DSN}"
+    encryptionKey: "${NETBIRD_ENCRYPTION_KEY}"
+
+  activityStore:
+    engine: "postgres"
+    dsn: "${POSTGRES_DSN}"
+EOF
+
+  if [[ "$NETBIRD_TRAFFIC_FLOW" == "yes" ]]; then
+    cat <<EOF
+
+  trafficFlow:
+    enabled: true
+    address: "https://${NETBIRD_DOMAIN}:443"
+    interval: "60s"
+EOF
+  fi
+}
+
+init_environment
--- a/infrastructure_files/migrate-to-enterprise.sh
+++ b/infrastructure_files/migrate-to-enterprise.sh
@@ -0,0 +1,638 @@
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+# NetBird — community combined → Enterprise combined migration
+#
+# Non-destructive migration: produces docker-compose.override.yml (auto-loaded
+# by docker compose) and config.yaml.enterprise alongside the operator's
+# existing files. Original docker-compose.yml and config.yaml are never
+# modified.
+#
+# Steps (all optional, asked interactively):
+#   1. Image swap         — replace community images with enterprise cloud images.
+#   2. Postgres migration — add Postgres, migrate SQLite data via migrate-store.
+#   3. Traffic flow       — add NATS + flow-enricher + flow-receiver.
+#
+# To revert:
+#   docker compose down
+#   rm -f docker-compose.override.yml config.yaml.enterprise
+#   # If Postgres migration was done, also restore the SQLite backup printed
+#   # at the end of this script's run.
+#   docker compose up -d
+
+OVERRIDE_FILE="docker-compose.override.yml"
+ENTERPRISE_CONFIG_FILE="config.yaml.enterprise"
+
+check_docker_compose() {
+  if command -v docker-compose &> /dev/null; then
+    echo "docker-compose"
+    return
+  fi
+  if docker compose --help &> /dev/null; then
+    echo "docker compose"
+    return
+  fi
+  echo "docker-compose is not installed or not in PATH." > /dev/stderr
+  exit 1
+}
+
+check_yq() {
+  if ! command -v yq &> /dev/null; then
+    cat > /dev/stderr <<'EOF'
+yq is required to parse and update YAML safely.
+
+  macOS:   brew install yq
+  Linux:   https://github.com/mikefarah/yq/releases (download binary into PATH)
+  Debian:  apt-get install yq   (Note: must be the mikefarah Go yq, not the Python wrapper.)
+
+EOF
+    exit 1
+  fi
+  if ! yq --version 2>&1 | grep -q "mikefarah"; then
+    echo "yq is present but appears to be the wrong implementation. The mikefarah Go-based yq is required (https://github.com/mikefarah/yq)." > /dev/stderr
+    exit 1
+  fi
+}
+
+check_openssl() {
+  if ! command -v openssl &> /dev/null; then
+    echo "openssl is not installed or not in PATH." > /dev/stderr
+    exit 1
+  fi
+}
+
+rand_password() {
+  openssl rand -hex 32
+}
+
+read_required() {
+  local prompt="$1"
+  local value=""
+  while [[ -z "$value" ]]; do
+    echo -n "$prompt: " > /dev/stderr
+    read -r value < /dev/tty
+    if [[ -z "$value" ]]; then
+      echo "Value cannot be empty." > /dev/stderr
+    fi
+  done
+  echo "$value"
+}
+
+read_secret() {
+  local prompt="$1"
+  local value=""
+  while [[ -z "$value" ]]; do
+    echo -n "$prompt: " > /dev/stderr
+    read -rs value < /dev/tty
+    echo "" > /dev/stderr
+    if [[ -z "$value" ]]; then
+      echo "Value cannot be empty." > /dev/stderr
+    fi
+  done
+  echo "$value"
+}
+
+read_yes_no() {
+  local prompt="$1"
+  local default="${2:-n}"
+  local hint
+  if [[ "$default" == "y" ]]; then
+    hint="[Y/n]"
+  else
+    hint="[y/N]"
+  fi
+  echo -n "${prompt} ${hint}: " > /dev/stderr
+  local ans=""
+  read -r ans < /dev/tty
+  if [[ -z "$ans" ]]; then
+    ans="$default"
+  fi
+  case "$ans" in
+    [Yy] | [Yy][Ee][Ss]) echo "yes" ;;
+    *) echo "no" ;;
+  esac
+}
+
+# ---------------------------------------------------------------------------
+# Detection — read the operator's existing compose to find service names and
+# paths we need to override. Bail loudly if shape isn't recognised.
+# ---------------------------------------------------------------------------
+
+detect_combined_service() {
+  yq eval '.services | to_entries | map(select(.value.image | test("^netbirdio/netbird-server"))) | .[0].key // ""' "$COMPOSE_FILE"
+}
+
+detect_dashboard_service() {
+  yq eval '.services | to_entries | map(select(.value.image | test("^netbirdio/dashboard"))) | .[0].key // ""' "$COMPOSE_FILE"
+}
+
+detect_config_yaml_host_path() {
+  yq eval ".services[\"$COMBINED_SERVICE\"].volumes[] | select(. | test(\":/etc/netbird/config.yaml\")) | sub(\":/etc/netbird/config.yaml.*\"; \"\") // \"\"" "$COMPOSE_FILE" | head -1
+}
+
+detect_data_volume() {
+  yq eval ".services[\"$COMBINED_SERVICE\"].volumes[] | select(. | test(\":/var/lib/netbird\")) | sub(\":/var/lib/netbird.*\"; \"\") // \"\"" "$COMPOSE_FILE" | head -1
+}
+
+detect_exposed_address() {
+  yq eval '.server.exposedAddress // ""' "$CONFIG_YAML_HOST"
+}
+
+detect_compose_network() {
+  local tag
+  tag=$(yq eval ".services[\"$COMBINED_SERVICE\"].networks | tag" "$COMPOSE_FILE" 2>/dev/null)
+  case "$tag" in
+    "!!seq")
+      yq eval ".services[\"$COMBINED_SERVICE\"].networks[0]" "$COMPOSE_FILE"
+      ;;
+    "!!map")
+      yq eval ".services[\"$COMBINED_SERVICE\"].networks | keys | .[0]" "$COMPOSE_FILE"
+      ;;
+    *)
+      echo "default"
+      ;;
+  esac
+}
+
+# ---------------------------------------------------------------------------
+# Renderers
+# ---------------------------------------------------------------------------
+
+# Build docker-compose.override.yml from the steps the operator selected.
+# Service names match what we detected on the operator's side.
+render_override() {
+  cat <<EOF
+# Generated by migrate-to-enterprise.sh. Mode 644.
+# Merged with docker-compose.yml automatically by Docker Compose.
+# Remove this file (and config.yaml.enterprise if present) to revert.
+
+services:
+  ${DASHBOARD_SERVICE}:
+    image: \${NETBIRD_DASHBOARD_IMAGE:-ghcr.io/netbirdio/dashboard-cloud:latest}
+
+  ${COMBINED_SERVICE}:
+    image: \${NETBIRD_SERVER_IMAGE:-ghcr.io/netbirdio/netbird-server-cloud:latest}
+    environment:
+      NB_LICENSE_KEY: \${NB_LICENSE_KEY}
+      NETBIRD_LICENSE_SERVER_BASE_URL: \${NETBIRD_LICENSE_SERVER_BASE_URL}
+EOF
+
+  if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+    cat <<EOF
+    depends_on:
+      postgres:
+        condition: service_healthy
+    volumes:
+      - ./${ENTERPRISE_CONFIG_FILE}:/etc/netbird/config.yaml.enterprise:ro
+    command: ["--config", "/etc/netbird/config.yaml.enterprise"]
+
+  postgres:
+    image: postgres:17
+    container_name: netbird-postgres
+    restart: unless-stopped
+    networks: [${COMPOSE_NETWORK}]
+    environment:
+      POSTGRES_USER: netbird
+      POSTGRES_PASSWORD: \${POSTGRES_PASSWORD}
+      POSTGRES_DB: netbird
+    volumes:
+      - netbird_postgres:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U netbird -d netbird"]
+      interval: 5s
+      timeout: 5s
+      retries: 20
+EOF
+  fi
+
+  if [[ "$ENABLE_FLOW" == "yes" ]]; then
+    cat <<EOF
+
+  nats:
+    image: nats:2
+    container_name: netbird-nats
+    restart: unless-stopped
+    networks: [${COMPOSE_NETWORK}]
+    command: ["-m", "8222", "--jetstream", "--store_dir", "/data"]
+    volumes:
+      - netbird_nats_data:/data
+
+  flow-enricher:
+    image: ghcr.io/netbirdio/flow-enricher-cloud:latest
+    container_name: netbird-flow-enricher
+    restart: unless-stopped
+    networks: [${COMPOSE_NETWORK}]
+    depends_on:
+      postgres:
+        condition: service_healthy
+      nats:
+        condition: service_started
+    environment:
+      NB_LICENSE_KEY: \${NB_LICENSE_KEY}
+      NETBIRD_LICENSE_SERVER_BASE_URL: \${NETBIRD_LICENSE_SERVER_BASE_URL}
+      NB_DATADIR: /var/lib/netbird
+      NB_MANAGEMENT_STORE_ENGINE: postgres
+      NB_MANAGEMENT_POSTGRES_DSN: "host=postgres user=netbird password=\${POSTGRES_PASSWORD} dbname=netbird port=5432 sslmode=disable"
+      NB_STORE_ENGINE_POSTGRES_DSN: "host=postgres user=netbird password=\${POSTGRES_PASSWORD} dbname=netbird port=5432 sslmode=disable"
+      NB_TRAFFIC_EVENT_STORE_ENGINE: postgres
+      NB_TRAFFIC_EVENT_POSTGRES_DSN: "host=postgres user=netbird password=\${POSTGRES_PASSWORD} dbname=netbird port=5432 sslmode=disable"
+      NB_MANAGEMENT_STORE_KEY: \${NETBIRD_ENCRYPTION_KEY}
+      NB_FLOW_ADAPTER_TYPE: nats
+      NB_FLOW_NATS_ENDPOINTS: nats://nats:4222
+      NB_FLOW_NATS_STREAM: traffic-events
+      NB_METRICS_PORT: 9091
+      NB_PERSISTENCE_RETENTION_PERIOD: 168h
+
+  flow-receiver:
+    image: ghcr.io/netbirdio/flow-receiver-cloud:latest
+    container_name: netbird-flow-receiver
+    restart: unless-stopped
+    networks: [${COMPOSE_NETWORK}]
+    depends_on:
+      nats:
+        condition: service_started
+    environment:
+      NB_LICENSE_KEY: \${NB_LICENSE_KEY}
+      NETBIRD_LICENSE_SERVER_BASE_URL: \${NETBIRD_LICENSE_SERVER_BASE_URL}
+      NB_FLOW_LISTEN_PORT: 80
+      NB_FLOW_ADAPTER_TYPE: nats
+      NB_FLOW_NATS_ENDPOINTS: nats://nats:4222
+      NB_FLOW_NATS_STREAM: traffic-events
+      NB_FLOW_AUTH_SECRET: \${NB_FLOW_AUTH_SECRET}
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.netbird-flow.rule=Host(\`${NETBIRD_HOSTNAME}\`) && PathPrefix(\`/flow.FlowService/\`)
+      - traefik.http.routers.netbird-flow.entrypoints=websecure
+      - traefik.http.routers.netbird-flow.tls=true
+      - traefik.http.routers.netbird-flow.tls.certresolver=letsencrypt
+      - traefik.http.routers.netbird-flow.service=netbird-flow-h2c
+      - traefik.http.routers.netbird-flow.priority=100
+      - traefik.http.services.netbird-flow-h2c.loadbalancer.server.port=80
+      - traefik.http.services.netbird-flow-h2c.loadbalancer.server.scheme=h2c
+EOF
+  fi
+
+  # Volume declarations for anything new the override introduced
+  local has_volumes="no"
+  if [[ "$MIGRATE_POSTGRES" == "yes" ]] || [[ "$ENABLE_FLOW" == "yes" ]]; then
+    has_volumes="yes"
+  fi
+
+  if [[ "$has_volumes" == "yes" ]]; then
+    cat <<EOF
+
+volumes:
+EOF
+    if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+      echo "  netbird_postgres:"
+    fi
+    if [[ "$ENABLE_FLOW" == "yes" ]]; then
+      echo "  netbird_nats_data:"
+    fi
+  fi
+}
+
+# Build config.yaml.enterprise by yq-editing the operator's existing
+# config.yaml. We don't touch the original file.
+render_enterprise_config() {
+  local pg_dsn="host=postgres user=netbird password=${POSTGRES_PASSWORD} dbname=netbird port=5432 sslmode=disable"
+
+  yq eval "
+    .server.store.engine = \"postgres\" |
+    .server.store.dsn = \"$pg_dsn\" |
+    .server.activityStore.engine = \"postgres\" |
+    .server.activityStore.dsn = \"$pg_dsn\" |
+    .server.authStore.engine = \"postgres\" |
+    .server.authStore.dsn = \"$pg_dsn\"
+  " "$CONFIG_YAML_HOST" > "$ENTERPRISE_CONFIG_FILE"
+
+  if [[ "$ENABLE_FLOW" == "yes" ]]; then
+    local flow_addr="${NETBIRD_DOMAIN}"
+    yq eval -i "
+      .server.trafficFlow.enabled = true |
+      .server.trafficFlow.address = \"$flow_addr\" |
+      .server.trafficFlow.interval = \"60s\"
+    " "$ENTERPRISE_CONFIG_FILE"
+  fi
+}
+
+# ---------------------------------------------------------------------------
+# Execution steps
+# ---------------------------------------------------------------------------
+
+resolve_data_volume() {
+  local short="$1"
+  local actual
+  # Resolve project-prefixed volume name from Docker Compose config first.
+  actual=$($DOCKER_COMPOSE_COMMAND config 2>/dev/null | yq eval ".volumes.\"$short\".name" - 2>/dev/null)
+  if [[ -n "$actual" && "$actual" != "null" ]]; then
+    echo "$actual"
+    return
+  fi
+  # Relative bind mount: docker-compose resolves it against the compose
+  # file's directory, but `docker run -v` resolves it against the current
+  # working directory. Normalize to an absolute path so both interpretations
+  # agree (and the printed revert command works from any CWD).
+  if [[ "$short" == ./* || "$short" == ../* ]]; then
+    local compose_dir
+    compose_dir="$(cd "$(dirname "$COMPOSE_FILE")" && pwd)"
+    (
+      cd "$compose_dir"
+      cd "$(dirname "$short")"
+      printf '%s/%s\n' "$(pwd)" "$(basename "$short")"
+    )
+    return
+  fi
+  # Not a named volume (e.g. an absolute bind-mount path) — use it as-is.
+  echo "$short"
+}
+
+backup_sqlite() {
+  BACKUP_DIR="$(pwd)/backups/sqlite-pre-enterprise-$(date +%Y%m%d-%H%M%S)"
+  mkdir -p "$BACKUP_DIR"
+  local data_volume_actual
+  data_volume_actual=$(resolve_data_volume "$DATA_VOLUME")
+  echo "Backing up SQLite store from volume '$data_volume_actual' to $BACKUP_DIR ..."
+  docker run --rm \
+    -v "${data_volume_actual}:/var/lib/netbird:ro" \
+    -v "${BACKUP_DIR}:/backup" \
+    busybox \
+    sh -c 'cp -a /var/lib/netbird/. /backup/ 2>/dev/null || true'
+  local copied
+  copied=$(find "$BACKUP_DIR" -mindepth 1 | head -1)
+  if [[ -z "$copied" ]]; then
+    echo "  ⚠ Backup directory is empty — the volume '$data_volume_actual' didn't contain data. Aborting." > /dev/stderr
+    exit 1
+  fi
+  echo "  done"
+}
+
+run_migrate_store() {
+  echo "Running migrate-store (SQLite → Postgres) ..."
+  $DOCKER_COMPOSE_COMMAND run --rm "$COMBINED_SERVICE" migrate-store --config /etc/netbird/config.yaml.enterprise --verify
+  echo "  done"
+}
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+init_migration() {
+  DOCKER_COMPOSE_COMMAND=$(check_docker_compose)
+  check_yq
+  check_openssl
+
+  COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.yml}"
+
+  if [[ ! -f "$COMPOSE_FILE" ]]; then
+    echo "$COMPOSE_FILE not found in $(pwd)." > /dev/stderr
+    exit 1
+  fi
+  if [[ -f "$OVERRIDE_FILE" ]] || [[ -f "$ENTERPRISE_CONFIG_FILE" ]]; then
+    echo "Migration artifacts already exist in $(pwd):"
+    [[ -f "$OVERRIDE_FILE" ]] && echo "  $OVERRIDE_FILE"
+    [[ -f "$ENTERPRISE_CONFIG_FILE" ]] && echo "  $ENTERPRISE_CONFIG_FILE"
+    echo ""
+    echo "Either you've already migrated, or a previous run was interrupted."
+    echo "To re-run cleanly: rm -f $OVERRIDE_FILE $ENTERPRISE_CONFIG_FILE"
+    exit 1
+  fi
+
+  COMBINED_SERVICE=$(detect_combined_service)
+  DASHBOARD_SERVICE=$(detect_dashboard_service)
+  CONFIG_YAML_HOST=$(detect_config_yaml_host_path)
+  DATA_VOLUME=$(detect_data_volume)
+  COMPOSE_NETWORK=$(detect_compose_network)
+
+  if [[ -z "$COMBINED_SERVICE" ]]; then
+    echo "Could not find a service running netbirdio/netbird-server* in $COMPOSE_FILE." > /dev/stderr
+    echo "This script targets the community combined-server deployment." > /dev/stderr
+    exit 1
+  fi
+  if [[ -z "$DASHBOARD_SERVICE" ]]; then
+    echo "Could not find a service running netbirdio/dashboard* in $COMPOSE_FILE." > /dev/stderr
+    exit 1
+  fi
+  if [[ -z "$CONFIG_YAML_HOST" ]]; then
+    echo "Could not find a config.yaml mount on $COMBINED_SERVICE (expected to bind-mount to /etc/netbird/config.yaml)." > /dev/stderr
+    exit 1
+  fi
+  if [[ ! -f "$CONFIG_YAML_HOST" ]]; then
+    echo "config.yaml host file not found at $CONFIG_YAML_HOST." > /dev/stderr
+    exit 1
+  fi
+  if [[ -z "$DATA_VOLUME" ]]; then
+    echo "Could not find a volume mounted at /var/lib/netbird on $COMBINED_SERVICE." > /dev/stderr
+    exit 1
+  fi
+
+  echo "Detected existing deployment:"
+  echo "  Combined service: $COMBINED_SERVICE"
+  echo "  Dashboard:        $DASHBOARD_SERVICE"
+  echo "  config.yaml:      $CONFIG_YAML_HOST"
+  echo "  Data volume:      $DATA_VOLUME"
+  echo "  Network:          $COMPOSE_NETWORK"
+  echo ""
+
+  local proceed
+  proceed=$(read_yes_no "Proceed with migration?" "y")
+  if [[ "$proceed" != "yes" ]]; then
+    echo "Aborted."
+    exit 0
+  fi
+
+  # Step 1 — always (this is the point of the script)
+  MIGRATE_IMAGES="yes"
+  echo ""
+  echo "Step 1: Image swap (community → Enterprise). License key required."
+  NB_LICENSE_KEY=$(read_secret "  License key")
+  GHCR_USERNAME="netbirdExtAccess1"
+  GHCR_TOKEN=$(read_secret "  GHCR token (input hidden)")
+
+  # Step 2 — optional
+  echo ""
+  MIGRATE_POSTGRES=$(read_yes_no "Step 2: Migrate storage from SQLite to Postgres? (recommended)" "n")
+  if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+    echo ""
+    echo "  ⚠  Data will be migrated from SQLite to Postgres. The SQLite store"
+    echo "     will be backed up automatically. To fully revert later, restore"
+    echo "     that backup and delete docker-compose.override.yml +"
+    echo "     config.yaml.enterprise."
+    local confirm
+    confirm=$(read_yes_no "  Continue?" "y")
+    if [[ "$confirm" != "yes" ]]; then
+      MIGRATE_POSTGRES="no"
+      echo "  Skipping Postgres migration."
+    else
+      POSTGRES_PASSWORD=$(rand_password)
+    fi
+  fi
+
+  # Step 3 — optional, only if Postgres is on (flow requires Postgres)
+  echo ""
+  if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+    ENABLE_FLOW=$(read_yes_no "Step 3: Enable traffic flow? (requires Postgres)" "n")
+    if [[ "$ENABLE_FLOW" == "yes" ]]; then
+      # Auth secret MUST match server.authSecret from config.yaml
+      NB_FLOW_AUTH_SECRET=$(yq eval '.server.authSecret // ""' "$CONFIG_YAML_HOST")
+      if [[ -z "$NB_FLOW_AUTH_SECRET" ]] || [[ "$NB_FLOW_AUTH_SECRET" == "null" ]]; then
+        echo "Could not read server.authSecret from $CONFIG_YAML_HOST." > /dev/stderr
+        echo "Flow receiver auth must match the combined server's authSecret." > /dev/stderr
+        exit 1
+      fi
+
+      NETBIRD_DOMAIN=$(detect_exposed_address)
+      if [[ -z "$NETBIRD_DOMAIN" ]] || [[ "$NETBIRD_DOMAIN" == "null" ]]; then
+        NETBIRD_DOMAIN=$(read_required "  Public NetBird URL (e.g. https://netbird.example.com)")
+      fi
+      # Strip protocol + port to leave just the hostname for the Traefik Host() rule.
+      NETBIRD_HOSTNAME=$(echo "$NETBIRD_DOMAIN" | sed -E 's,^https?://,,' | sed 's,:.*,,' | sed 's,/.*,,')
+
+      # We need the encryption key from the existing config.yaml for the enricher
+      NETBIRD_ENCRYPTION_KEY=$(yq eval '.server.store.encryptionKey // ""' "$CONFIG_YAML_HOST")
+      if [[ -z "$NETBIRD_ENCRYPTION_KEY" ]] || [[ "$NETBIRD_ENCRYPTION_KEY" == "null" ]]; then
+        echo "Could not read server.store.encryptionKey from $CONFIG_YAML_HOST." > /dev/stderr
+        exit 1
+      fi
+    fi
+  else
+    ENABLE_FLOW="no"
+    echo "Step 3 (traffic flow) skipped — requires Postgres."
+  fi
+}
+
+apply_changes() {
+  echo ""
+  echo "Writing $OVERRIDE_FILE ..."
+  install -m 644 /dev/null "$OVERRIDE_FILE"
+  render_override > "$OVERRIDE_FILE"
+
+  if [[ -z "${NETBIRD_LICENSE_SERVER_BASE_URL:-}" ]]; then
+    sed -i.bak '/NETBIRD_LICENSE_SERVER_BASE_URL/d' "$OVERRIDE_FILE" && rm -f "$OVERRIDE_FILE.bak"
+  fi
+
+  if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+    echo "Writing $ENTERPRISE_CONFIG_FILE ..."
+    install -m 600 /dev/null "$ENTERPRISE_CONFIG_FILE"
+    render_enterprise_config
+  fi
+
+  # Persist secrets that the override file references via env interpolation.
+  # We write them to a .env file in the current directory; docker compose
+  # picks it up automatically.
+  echo "Writing .env additions (mode 600) ..."
+  local ENV_FILE=".env"
+  touch "$ENV_FILE"
+  chmod 600 "$ENV_FILE"
+  {
+    echo ""
+    echo "# Added by migrate-to-enterprise.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+    echo "NB_LICENSE_KEY=${NB_LICENSE_KEY}"
+    if [[ -n "${NETBIRD_LICENSE_SERVER_BASE_URL:-}" ]]; then
+      echo "NETBIRD_LICENSE_SERVER_BASE_URL=${NETBIRD_LICENSE_SERVER_BASE_URL}"
+    fi
+    if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+      echo "POSTGRES_PASSWORD=${POSTGRES_PASSWORD}"
+    fi
+    if [[ "$ENABLE_FLOW" == "yes" ]]; then
+      echo "NB_FLOW_AUTH_SECRET=${NB_FLOW_AUTH_SECRET}"
+      echo "NETBIRD_ENCRYPTION_KEY=${NETBIRD_ENCRYPTION_KEY}"
+    fi
+  } >> "$ENV_FILE"
+
+  echo ""
+  echo "Logging in to ghcr.io ..."
+  printf '%s' "$GHCR_TOKEN" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin
+  unset GHCR_TOKEN
+
+  echo ""
+  echo "Pulling enterprise images ..."
+  $DOCKER_COMPOSE_COMMAND pull
+
+  if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+    echo ""
+    echo "Stopping existing services (volumes preserved) ..."
+    $DOCKER_COMPOSE_COMMAND down
+
+    backup_sqlite
+
+    echo ""
+    echo "Starting Postgres ..."
+    $DOCKER_COMPOSE_COMMAND up -d postgres
+
+    # Wait for healthy
+    local counter=0
+    echo -n "Waiting for Postgres to become ready"
+    while ! $DOCKER_COMPOSE_COMMAND exec -T postgres pg_isready -U netbird -d netbird &> /dev/null; do
+      echo -n " ."
+      sleep 2
+      counter=$((counter + 1))
+      if [[ $counter -ge 60 ]]; then
+        echo ""
+        echo "Postgres did not become ready in 120s. Recent logs:"
+        $DOCKER_COMPOSE_COMMAND logs --tail=20 postgres
+        exit 1
+      fi
+    done
+    echo " done"
+
+    run_migrate_store
+  fi
+
+  echo ""
+  echo "Bringing up all services ..."
+  $DOCKER_COMPOSE_COMMAND up -d
+
+  echo ""
+  echo "Migration complete."
+}
+
+print_summary() {
+  echo ""
+  echo "──────────────────────────────────────────────────────────────────────"
+  echo " Summary"
+  echo "──────────────────────────────────────────────────────────────────────"
+  echo "  Images:           swapped to enterprise"
+  [[ "$MIGRATE_POSTGRES" == "yes" ]] && echo "  Storage:          Postgres (data migrated from SQLite)"
+  [[ "$MIGRATE_POSTGRES" != "yes" ]] && echo "  Storage:          SQLite (unchanged)"
+  [[ "$ENABLE_FLOW" == "yes" ]] && echo "  Traffic flow:     enabled"
+  [[ "$ENABLE_FLOW" != "yes" ]] && echo "  Traffic flow:     disabled"
+  echo ""
+  echo "  Generated files (next to your docker-compose.yml):"
+  echo "    $OVERRIDE_FILE"
+  [[ "$MIGRATE_POSTGRES" == "yes" ]] && echo "    $ENTERPRISE_CONFIG_FILE"
+  echo "    .env  (license key + secrets, mode 600)"
+  [[ "$MIGRATE_POSTGRES" == "yes" ]] && echo "    backups/sqlite-pre-enterprise-*/  (SQLite backup)"
+  echo ""
+  echo " Tail logs:"
+  echo "   $DOCKER_COMPOSE_COMMAND logs -f $COMBINED_SERVICE"
+  echo ""
+  echo "──────────────────────────────────────────────────────────────────────"
+  echo " To revert"
+  echo "──────────────────────────────────────────────────────────────────────"
+  echo "  $DOCKER_COMPOSE_COMMAND down"
+  if [[ "$MIGRATE_POSTGRES" == "yes" ]]; then
+    # Resolve project-prefixed volume names now (before override is removed).
+    local pg_volume data_volume_actual
+    pg_volume=$(resolve_data_volume "netbird_postgres")
+    data_volume_actual=$(resolve_data_volume "$DATA_VOLUME")
+    echo "  # Remove the Postgres volume FIRST, before deleting the override file:"
+    echo "  docker volume rm $pg_volume"
+    echo "  # Restore SQLite from the backup created during this run:"
+    echo "  docker run --rm -v ${data_volume_actual}:/var/lib/netbird -v ${BACKUP_DIR}:/backup busybox sh -c 'cp -a /backup/. /var/lib/netbird/'"
+  fi
+  echo "  rm -f $OVERRIDE_FILE $ENTERPRISE_CONFIG_FILE"
+  echo "  # Remove migrate-to-enterprise.sh additions from .env (search for the timestamp marker)"
+  echo "  $DOCKER_COMPOSE_COMMAND up -d"
+  echo "──────────────────────────────────────────────────────────────────────"
+}
+
+# ---------------------------------------------------------------------------
+# Run
+# ---------------------------------------------------------------------------
+
+init_migration
+apply_changes
+print_summary
--- a/management/internals/shared/grpc/server.go
+++ b/management/internals/shared/grpc/server.go
@@ -1205,7 +1205,7 @@ func (s *Server) SyncMeta(ctx context.Context, req *proto.EncryptedMessage) (*pr
 		return nil, msg
 	}

-	err = s.accountManager.SyncPeerMeta(ctx, peerKey.String(), extractPeerMeta(ctx, syncMetaReq.GetMeta()))
+	err = s.accountManager.SyncPeerMeta(ctx, peerKey.String(), extractPeerMeta(ctx, syncMetaReq.GetMeta()), realIP)
 	if err != nil {
 		return nil, mapError(ctx, err)
 	}
@@ -1254,7 +1254,10 @@ func (s *Server) Logout(ctx context.Context, req *proto.EncryptedMessage) (*prot
 func toProtocolChecks(ctx context.Context, postureChecks []*posture.Checks) []*proto.Checks {
 	protoChecks := make([]*proto.Checks, 0, len(postureChecks))
 	for _, postureCheck := range postureChecks {
-		protoChecks = append(protoChecks, toProtocolCheck(postureCheck))
+		check := toProtocolCheck(postureCheck)
+		if check != nil {
+			protoChecks = append(protoChecks, check)
+		}
 	}

 	return protoChecks
@@ -1278,5 +1281,9 @@ func toProtocolCheck(postureCheck *posture.Checks) *proto.Checks {
 		}
 	}

+	if len(protoCheck.Files) == 0 {
+		return nil
+	}
+
 	return protoCheck
 }
--- a/management/server/account.go
+++ b/management/server/account.go
@@ -1889,12 +1889,12 @@ func domainIsUpToDate(domain string, domainCategory string, userAuth auth.UserAu
 // concurrent stream that started earlier loses the optimistic-lock race
 // in MarkPeerConnected and bails without writing.
 func (am *DefaultAccountManager) SyncAndMarkPeer(ctx context.Context, accountID string, peerPubKey string, meta nbpeer.PeerSystemMeta, realIP net.IP, syncTime time.Time) (*nbpeer.Peer, *types.NetworkMap, []*posture.Checks, int64, error) {
-	peer, netMap, postureChecks, dnsfwdPort, err := am.SyncPeer(ctx, types.PeerSync{WireGuardPubKey: peerPubKey, Meta: meta}, accountID)
+	peer, netMap, postureChecks, dnsfwdPort, err := am.SyncPeer(ctx, types.PeerSync{WireGuardPubKey: peerPubKey, Meta: meta, RealIP: realIP}, accountID)
 	if err != nil {
 		return nil, nil, nil, 0, fmt.Errorf("error syncing peer: %w", err)
 	}

-	if err := am.MarkPeerConnected(ctx, peerPubKey, realIP, accountID, syncTime.UnixNano(), netMap); err != nil {
+	if err := am.MarkPeerConnected(ctx, peerPubKey, accountID, syncTime.UnixNano(), netMap); err != nil {
 		log.WithContext(ctx).Warnf("failed marking peer as connected %s %v", peerPubKey, err)
 	}

@@ -1914,13 +1914,13 @@ func (am *DefaultAccountManager) OnPeerDisconnected(ctx context.Context, account
 	return nil
 }

-func (am *DefaultAccountManager) SyncPeerMeta(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta) error {
+func (am *DefaultAccountManager) SyncPeerMeta(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta, realIP net.IP) error {
 	accountID, err := am.Store.GetAccountIDByPeerPubKey(ctx, peerPubKey)
 	if err != nil {
 		return err
 	}

-	_, _, _, _, err = am.SyncPeer(ctx, types.PeerSync{WireGuardPubKey: peerPubKey, Meta: meta, UpdateAccountPeers: true}, accountID)
+	_, _, _, _, err = am.SyncPeer(ctx, types.PeerSync{WireGuardPubKey: peerPubKey, Meta: meta, RealIP: realIP, UpdateAccountPeers: true}, accountID)
 	if err != nil {
 		return err
 	}
--- a/management/server/account/manager.go
+++ b/management/server/account/manager.go
@@ -62,7 +62,7 @@ type Manager interface {
 	GetUserFromUserAuth(ctx context.Context, userAuth auth.UserAuth) (*types.User, error)
 	ListUsers(ctx context.Context, accountID string) ([]*types.User, error)
 	GetPeers(ctx context.Context, accountID, userID, nameFilter, ipFilter string) ([]*nbpeer.Peer, error)
-	MarkPeerConnected(ctx context.Context, peerKey string, realIP net.IP, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error
+	MarkPeerConnected(ctx context.Context, peerKey string, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error
 	MarkPeerDisconnected(ctx context.Context, peerKey string, accountID string, sessionStartedAt int64) error
 	DeletePeer(ctx context.Context, accountID, peerID, userID string) error
 	UpdatePeer(ctx context.Context, accountID, userID string, p *nbpeer.Peer) (*nbpeer.Peer, error)
@@ -123,7 +123,7 @@ type Manager interface {
 	GetValidatedPeers(ctx context.Context, accountID string) (map[string]struct{}, map[string]string, error)
 	SyncAndMarkPeer(ctx context.Context, accountID string, peerPubKey string, meta nbpeer.PeerSystemMeta, realIP net.IP, syncTime time.Time) (*nbpeer.Peer, *types.NetworkMap, []*posture.Checks, int64, error)
 	OnPeerDisconnected(ctx context.Context, accountID string, peerPubKey string, streamStartTime time.Time) error
-	SyncPeerMeta(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta) error
+	SyncPeerMeta(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta, realIP net.IP) error
 	FindExistingPostureCheck(accountID string, checks *posture.ChecksDefinition) (*posture.Checks, error)
 	GetAccountIDForPeerKey(ctx context.Context, peerKey string) (string, error)
 	GetAccountSettings(ctx context.Context, accountID string, userID string) (*types.Settings, error)
--- a/management/server/account/manager_mock.go
+++ b/management/server/account/manager_mock.go
@@ -1323,17 +1323,17 @@ func (mr *MockManagerMockRecorder) ExtendPeerSession(ctx, peerPubKey, userID int
 }

 // MarkPeerConnected mocks base method.
-func (m *MockManager) MarkPeerConnected(ctx context.Context, peerKey string, realIP net.IP, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error {
+func (m *MockManager) MarkPeerConnected(ctx context.Context, peerKey string, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error {
 	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "MarkPeerConnected", ctx, peerKey, realIP, accountID, sessionStartedAt, nmap)
+	ret := m.ctrl.Call(m, "MarkPeerConnected", ctx, peerKey, accountID, sessionStartedAt, nmap)
 	ret0, _ := ret[0].(error)
 	return ret0
 }

 // MarkPeerConnected indicates an expected call of MarkPeerConnected.
-func (mr *MockManagerMockRecorder) MarkPeerConnected(ctx, peerKey, realIP, accountID, sessionStartedAt, nmap interface{}) *gomock.Call {
+func (mr *MockManagerMockRecorder) MarkPeerConnected(ctx, peerKey, accountID, sessionStartedAt, nmap interface{}) *gomock.Call {
 	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MarkPeerConnected", reflect.TypeOf((*MockManager)(nil).MarkPeerConnected), ctx, peerKey, realIP, accountID, sessionStartedAt, nmap)
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MarkPeerConnected", reflect.TypeOf((*MockManager)(nil).MarkPeerConnected), ctx, peerKey, accountID, sessionStartedAt, nmap)
 }

 // MarkPeerDisconnected mocks base method.
@@ -1586,17 +1586,17 @@ func (mr *MockManagerMockRecorder) SyncPeer(ctx, sync, accountID interface{}) *g
 }

 // SyncPeerMeta mocks base method.
-func (m *MockManager) SyncPeerMeta(ctx context.Context, peerPubKey string, meta peer.PeerSystemMeta) error {
+func (m *MockManager) SyncPeerMeta(ctx context.Context, peerPubKey string, meta peer.PeerSystemMeta, realIP net.IP) error {
 	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "SyncPeerMeta", ctx, peerPubKey, meta)
+	ret := m.ctrl.Call(m, "SyncPeerMeta", ctx, peerPubKey, meta, realIP)
 	ret0, _ := ret[0].(error)
 	return ret0
 }

 // SyncPeerMeta indicates an expected call of SyncPeerMeta.
-func (mr *MockManagerMockRecorder) SyncPeerMeta(ctx, peerPubKey, meta interface{}) *gomock.Call {
+func (mr *MockManagerMockRecorder) SyncPeerMeta(ctx, peerPubKey, meta, realIP interface{}) *gomock.Call {
 	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SyncPeerMeta", reflect.TypeOf((*MockManager)(nil).SyncPeerMeta), ctx, peerPubKey, meta)
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SyncPeerMeta", reflect.TypeOf((*MockManager)(nil).SyncPeerMeta), ctx, peerPubKey, meta, realIP)
 }

 // SyncUserJWTGroups mocks base method.
--- a/management/server/account_test.go
+++ b/management/server/account_test.go
@@ -1836,7 +1836,7 @@ func TestDefaultAccountManager_UpdatePeer_PeerLoginExpiration(t *testing.T) {
 	accountID, err := manager.GetAccountIDByUserID(context.Background(), auth.UserAuth{UserId: userID})
 	require.NoError(t, err, "unable to get the account")

-	err = manager.MarkPeerConnected(context.Background(), key.PublicKey().String(), nil, accountID, time.Now().UTC().UnixNano(), nil)
+	err = manager.MarkPeerConnected(context.Background(), key.PublicKey().String(), accountID, time.Now().UTC().UnixNano(), nil)
 	require.NoError(t, err, "unable to mark peer connected")

 	_, err = manager.UpdateAccountSettings(context.Background(), accountID, userID, &types.Settings{
@@ -1907,7 +1907,7 @@ func TestDefaultAccountManager_MarkPeerConnected_PeerLoginExpiration(t *testing.
 	require.NoError(t, err, "unable to get the account")

 	// when we mark peer as connected, the peer login expiration routine should trigger
-	err = manager.MarkPeerConnected(context.Background(), key.PublicKey().String(), nil, accountID, time.Now().UTC().UnixNano(), nil)
+	err = manager.MarkPeerConnected(context.Background(), key.PublicKey().String(), accountID, time.Now().UTC().UnixNano(), nil)
 	require.NoError(t, err, "unable to mark peer connected")

 	failed := waitTimeout(wg, time.Second)
@@ -1916,6 +1916,117 @@ func TestDefaultAccountManager_MarkPeerConnected_PeerLoginExpiration(t *testing.
 	}
 }

+func TestDefaultAccountManager_MarkPeerDisconnected_SchedulesInactivityExpiration(t *testing.T) {
+	manager, _, err := createManager(t)
+	require.NoError(t, err, "unable to create account manager")
+
+	accountID, err := manager.GetAccountIDByUserID(context.Background(), auth.UserAuth{UserId: userID})
+	require.NoError(t, err, "unable to create an account")
+
+	key, err := wgtypes.GenerateKey()
+	require.NoError(t, err, "unable to generate WireGuard key")
+	peerPubKey := key.PublicKey().String()
+
+	_, _, _, _, err = manager.AddPeer(context.Background(), "", "", userID, &nbpeer.Peer{
+		Key:                         peerPubKey,
+		Meta:                        nbpeer.PeerSystemMeta{Hostname: "test-peer"},
+		InactivityExpirationEnabled: true,
+	}, false)
+	require.NoError(t, err, "unable to add peer")
+
+	_, err = manager.UpdateAccountSettings(context.Background(), accountID, userID, &types.Settings{
+		PeerLoginExpiration:             time.Hour,
+		PeerLoginExpirationEnabled:      true,
+		PeerInactivityExpiration:        time.Hour,
+		PeerInactivityExpirationEnabled: true,
+		Extra:                           &types.ExtraSettings{},
+	})
+	require.NoError(t, err, "expecting to update account settings successfully but got error")
+
+	// Establish a session so the matching-token disconnect is actually applied.
+	streamStartTime := time.Now().UTC()
+	err = manager.MarkPeerConnected(context.Background(), peerPubKey, accountID, streamStartTime.UnixNano(), nil)
+	require.NoError(t, err, "unable to mark peer connected")
+
+	// Install the mock only now, so the assertion observes the disconnect, not
+	// the earlier connect.
+	scheduled := make(chan struct{}, 1)
+	manager.peerInactivityExpiry = &MockScheduler{
+		CancelFunc: func(ctx context.Context, IDs []string) {},
+		ScheduleFunc: func(ctx context.Context, in time.Duration, ID string, job func() (nextRunIn time.Duration, reschedule bool)) {
+			select {
+			case scheduled <- struct{}{}:
+			default:
+			}
+		},
+	}
+
+	err = manager.MarkPeerDisconnected(context.Background(), peerPubKey, accountID, streamStartTime.UnixNano())
+	require.NoError(t, err, "unable to mark peer disconnected")
+
+	select {
+	case <-scheduled:
+		// expected: disconnect re-armed the inactivity expiry timer
+	case <-time.After(time.Second):
+		t.Fatal("expected inactivity expiration to be rescheduled when an eligible peer disconnects")
+	}
+}
+
+func TestDefaultAccountManager_MarkPeerDisconnected_SkipsInactivityExpirationWhenDisabled(t *testing.T) {
+	manager, _, err := createManager(t)
+	require.NoError(t, err, "unable to create account manager")
+
+	accountID, err := manager.GetAccountIDByUserID(context.Background(), auth.UserAuth{UserId: userID})
+	require.NoError(t, err, "unable to create an account")
+
+	key, err := wgtypes.GenerateKey()
+	require.NoError(t, err, "unable to generate WireGuard key")
+	peerPubKey := key.PublicKey().String()
+
+	_, _, _, _, err = manager.AddPeer(context.Background(), "", "", userID, &nbpeer.Peer{
+		Key:                         peerPubKey,
+		Meta:                        nbpeer.PeerSystemMeta{Hostname: "test-peer"},
+		InactivityExpirationEnabled: true,
+	}, false)
+	require.NoError(t, err, "unable to add peer")
+
+	// Peer is eligible (SSO + inactivity enabled) but the account-level setting
+	// stays disabled, so disconnect must not schedule anything.
+	_, err = manager.UpdateAccountSettings(context.Background(), accountID, userID, &types.Settings{
+		PeerLoginExpiration:             time.Hour,
+		PeerLoginExpirationEnabled:      true,
+		PeerInactivityExpiration:        time.Hour,
+		PeerInactivityExpirationEnabled: false,
+		Extra:                           &types.ExtraSettings{},
+	})
+	require.NoError(t, err, "expecting to update account settings successfully but got error")
+
+	streamStartTime := time.Now().UTC()
+	err = manager.MarkPeerConnected(context.Background(), peerPubKey, accountID, streamStartTime.UnixNano(), nil)
+	require.NoError(t, err, "unable to mark peer connected")
+
+	scheduled := make(chan struct{}, 1)
+	manager.peerInactivityExpiry = &MockScheduler{
+		CancelFunc: func(ctx context.Context, IDs []string) {},
+		ScheduleFunc: func(ctx context.Context, in time.Duration, ID string, job func() (nextRunIn time.Duration, reschedule bool)) {
+			select {
+			case scheduled <- struct{}{}:
+			default:
+			}
+		},
+	}
+
+	err = manager.MarkPeerDisconnected(context.Background(), peerPubKey, accountID, streamStartTime.UnixNano())
+	require.NoError(t, err, "unable to mark peer disconnected")
+
+	select {
+	case <-scheduled:
+		t.Fatal("inactivity expiration must not be scheduled while the account-level setting is disabled")
+	case <-time.After(200 * time.Millisecond):
+		// expected: nothing scheduled
+	}
+}
+
 func TestDefaultAccountManager_OnPeerDisconnected_LastSeenCheck(t *testing.T) {
 	manager, _, err := createManager(t)
 	require.NoError(t, err, "unable to create account manager")
@@ -1935,7 +2046,7 @@ func TestDefaultAccountManager_OnPeerDisconnected_LastSeenCheck(t *testing.T) {

 	t.Run("disconnect peer when session token matches", func(t *testing.T) {
 		streamStartTime := time.Now().UTC()
-		err = manager.MarkPeerConnected(context.Background(), peerPubKey, nil, accountID, streamStartTime.UnixNano(), nil)
+		err = manager.MarkPeerConnected(context.Background(), peerPubKey, accountID, streamStartTime.UnixNano(), nil)
 		require.NoError(t, err, "unable to mark peer connected")

 		peer, err := manager.Store.GetPeerByPeerPubKey(context.Background(), store.LockingStrengthNone, peerPubKey)
@@ -1956,7 +2067,7 @@ func TestDefaultAccountManager_OnPeerDisconnected_LastSeenCheck(t *testing.T) {
 	t.Run("skip disconnect when stored session is newer (zombie stream protection)", func(t *testing.T) {
 		// Newer stream wins on connect (sets SessionStartedAt = now ns).
 		streamStartTime := time.Now().UTC()
-		err = manager.MarkPeerConnected(context.Background(), peerPubKey, nil, accountID, streamStartTime.UnixNano(), nil)
+		err = manager.MarkPeerConnected(context.Background(), peerPubKey, accountID, streamStartTime.UnixNano(), nil)
 		require.NoError(t, err, "unable to mark peer connected")

 		peer, err := manager.Store.GetPeerByPeerPubKey(context.Background(), store.LockingStrengthNone, peerPubKey)
@@ -1980,7 +2091,7 @@ func TestDefaultAccountManager_OnPeerDisconnected_LastSeenCheck(t *testing.T) {

 	t.Run("skip stale connect when stored session is newer (blocked goroutine protection)", func(t *testing.T) {
 		node2SyncTime := time.Now().UTC()
-		err = manager.MarkPeerConnected(context.Background(), peerPubKey, nil, accountID, node2SyncTime.UnixNano(), nil)
+		err = manager.MarkPeerConnected(context.Background(), peerPubKey, accountID, node2SyncTime.UnixNano(), nil)
 		require.NoError(t, err, "node 2 should connect peer")

 		peer, err := manager.Store.GetPeerByPeerPubKey(context.Background(), store.LockingStrengthNone, peerPubKey)
@@ -1990,7 +2101,7 @@ func TestDefaultAccountManager_OnPeerDisconnected_LastSeenCheck(t *testing.T) {
 			"SessionStartedAt should equal node2SyncTime token")

 		node1StaleSyncTime := node2SyncTime.Add(-1 * time.Minute)
-		err = manager.MarkPeerConnected(context.Background(), peerPubKey, nil, accountID, node1StaleSyncTime.UnixNano(), nil)
+		err = manager.MarkPeerConnected(context.Background(), peerPubKey, accountID, node1StaleSyncTime.UnixNano(), nil)
 		require.NoError(t, err, "stale connect should not return error")

 		peer, err = manager.Store.GetPeerByPeerPubKey(context.Background(), store.LockingStrengthNone, peerPubKey)
@@ -2052,7 +2163,7 @@ func TestDefaultAccountManager_MarkPeerConnected_ConcurrentRace(t *testing.T) {
 			defer done.Done()
 			ready.Done()
 			start.Wait()
-			errs <- manager.MarkPeerConnected(context.Background(), peerPubKey, nil, accountID, token, nil)
+			errs <- manager.MarkPeerConnected(context.Background(), peerPubKey, accountID, token, nil)
 		}()
 	}

@@ -2093,7 +2204,7 @@ func TestDefaultAccountManager_UpdateAccountSettings_PeerLoginExpiration(t *test
 	account, err := manager.Store.GetAccount(context.Background(), accountID)
 	require.NoError(t, err, "unable to get the account")

-	err = manager.MarkPeerConnected(context.Background(), key.PublicKey().String(), nil, accountID, time.Now().UTC().UnixNano(), nil)
+	err = manager.MarkPeerConnected(context.Background(), key.PublicKey().String(), accountID, time.Now().UTC().UnixNano(), nil)
 	require.NoError(t, err, "unable to mark peer connected")

 	wg := &sync.WaitGroup{}
--- a/management/server/mock_server/account_mock.go
+++ b/management/server/mock_server/account_mock.go
@@ -39,7 +39,7 @@ type MockAccountManager struct {
 	GetUserFromUserAuthFunc               func(ctx context.Context, userAuth auth.UserAuth) (*types.User, error)
 	ListUsersFunc                         func(ctx context.Context, accountID string) ([]*types.User, error)
 	GetPeersFunc                          func(ctx context.Context, accountID, userID, nameFilter, ipFilter string) ([]*nbpeer.Peer, error)
-	MarkPeerConnectedFunc                 func(ctx context.Context, peerKey string, realIP net.IP, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error
+	MarkPeerConnectedFunc                 func(ctx context.Context, peerKey string, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error
 	MarkPeerDisconnectedFunc              func(ctx context.Context, peerKey string, accountID string, sessionStartedAt int64) error
 	SyncAndMarkPeerFunc                   func(ctx context.Context, accountID string, peerPubKey string, meta nbpeer.PeerSystemMeta, realIP net.IP, syncTime time.Time) (*nbpeer.Peer, *types.NetworkMap, []*posture.Checks, int64, error)
 	DeletePeerFunc                        func(ctx context.Context, accountID, peerKey, userID string) error
@@ -114,7 +114,7 @@ type MockAccountManager struct {
 	GetIdpManagerFunc                     func() idp.Manager
 	UpdateIntegratedValidatorFunc         func(ctx context.Context, accountID, userID, validator string, groups []string) error
 	GroupValidationFunc                   func(ctx context.Context, accountId string, groups []string) (bool, error)
-	SyncPeerMetaFunc                      func(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta) error
+	SyncPeerMetaFunc                      func(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta, realIP net.IP) error
 	FindExistingPostureCheckFunc          func(accountID string, checks *posture.ChecksDefinition) (*posture.Checks, error)
 	GetAccountIDForPeerKeyFunc            func(ctx context.Context, peerKey string) (string, error)
 	GetAccountByIDFunc                    func(ctx context.Context, accountID string, userID string) (*types.Account, error)
@@ -345,9 +345,9 @@ func (am *MockAccountManager) GetAccountIDByUserID(ctx context.Context, userAuth
 }

 // MarkPeerConnected mock implementation of MarkPeerConnected from server.AccountManager interface
-func (am *MockAccountManager) MarkPeerConnected(ctx context.Context, peerKey string, realIP net.IP, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error {
+func (am *MockAccountManager) MarkPeerConnected(ctx context.Context, peerKey string, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error {
 	if am.MarkPeerConnectedFunc != nil {
-		return am.MarkPeerConnectedFunc(ctx, peerKey, realIP, accountID, sessionStartedAt, nmap)
+		return am.MarkPeerConnectedFunc(ctx, peerKey, accountID, sessionStartedAt, nmap)
 	}
 	return status.Errorf(codes.Unimplemented, "method MarkPeerConnected is not implemented")
 }
@@ -975,9 +975,9 @@ func (am *MockAccountManager) GroupValidation(ctx context.Context, accountId str
 }

 // SyncPeerMeta mocks SyncPeerMeta of the AccountManager interface
-func (am *MockAccountManager) SyncPeerMeta(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta) error {
+func (am *MockAccountManager) SyncPeerMeta(ctx context.Context, peerPubKey string, meta nbpeer.PeerSystemMeta, realIP net.IP) error {
 	if am.SyncPeerMetaFunc != nil {
-		return am.SyncPeerMetaFunc(ctx, peerPubKey, meta)
+		return am.SyncPeerMetaFunc(ctx, peerPubKey, meta, realIP)
 	}
 	return status.Errorf(codes.Unimplemented, "method SyncPeerMeta is not implemented")
 }
--- a/management/server/peer.go
+++ b/management/server/peer.go
@@ -74,7 +74,7 @@ func (am *DefaultAccountManager) GetPeers(ctx context.Context, accountID, userID
 //
 // Disconnects use MarkPeerDisconnected and require the session to match
 // exactly; see PeerStatus.SessionStartedAt for the protocol.
-func (am *DefaultAccountManager) MarkPeerConnected(ctx context.Context, peerPubKey string, realIP net.IP, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error {
+func (am *DefaultAccountManager) MarkPeerConnected(ctx context.Context, peerPubKey string, accountID string, sessionStartedAt int64, nmap *types.NetworkMap) error {
 	start := time.Now()
 	defer func() {
 		am.metrics.AccountManagerMetrics().RecordPeerStatusUpdateDuration(telemetry.PeerStatusConnect, time.Since(start))
@@ -102,10 +102,6 @@ func (am *DefaultAccountManager) MarkPeerConnected(ctx context.Context, peerPubK
 	}
 	am.metrics.AccountManagerMetrics().CountPeerStatusUpdate(telemetry.PeerStatusConnect, telemetry.PeerStatusApplied)

-	if am.geo != nil && realIP != nil {
-		am.updatePeerLocationIfChanged(ctx, accountID, peer, realIP)
-	}
-
 	if err = am.schedulePeerExpirations(ctx, accountID, peer); err != nil {
 		return err
 	}
@@ -192,27 +188,40 @@ func (am *DefaultAccountManager) MarkPeerDisconnected(ctx context.Context, peerP
 		}
 	}

+	if peer.AddedWithSSOLogin() && peer.InactivityExpirationEnabled {
+		settings, err := am.Store.GetAccountSettings(ctx, store.LockingStrengthNone, accountID)
+		if err != nil {
+			log.WithContext(ctx).Warnf("failed getting account settings to schedule inactivity expiration for peer %s: %v", peer.ID, err)
+		} else if settings.PeerInactivityExpirationEnabled {
+			am.checkAndSchedulePeerInactivityExpiration(ctx, accountID)
+		}
+	}
+
 	return nil
 }

-// updatePeerLocationIfChanged refreshes the geolocation on a separate
-// row update, only when the connection IP actually changed. Geo lookups
-// are expensive so we skip same-IP reconnects.
-func (am *DefaultAccountManager) updatePeerLocationIfChanged(ctx context.Context, accountID string, peer *nbpeer.Peer, realIP net.IP) {
+// resolvePeerLocation looks up the geo location for realIP, returning nil when
+// there is nothing to apply: geo disabled, no real IP, the IP is unchanged from
+// what the peer already has, or the lookup failed. Geo lookups are skipped on
+// same-IP reconnects since they are comparatively expensive. The returned value
+// is applied by Peer.UpdateMetaIfNew so the change is persisted by its peer save.
+func (am *DefaultAccountManager) resolvePeerLocation(ctx context.Context, peer *nbpeer.Peer, realIP net.IP) *nbpeer.Location {
+	if am.geo == nil || realIP == nil {
+		return nil
+	}
 	if peer.Location.ConnectionIP != nil && peer.Location.ConnectionIP.Equal(realIP) {
-		return
+		return nil
 	}
 	location, err := am.geo.Lookup(realIP)
 	if err != nil {
 		log.WithContext(ctx).Warnf("failed to get location for peer %s realip: [%s]: %v", peer.ID, realIP.String(), err)
-		return
+		return nil
 	}
-	peer.Location.ConnectionIP = realIP
-	peer.Location.CountryCode = location.Country.ISOCode
-	peer.Location.CityName = location.City.Names.En
-	peer.Location.GeoNameID = location.City.GeonameID
-	if err := am.Store.SavePeerLocation(ctx, accountID, peer); err != nil {
-		log.WithContext(ctx).Warnf("could not store location for peer %s: %s", peer.ID, err)
+	return &nbpeer.Location{
+		ConnectionIP: realIP,
+		CountryCode:  location.Country.ISOCode,
+		CityName:     location.City.Names.En,
+		GeoNameID:    location.City.GeonameID,
 	}
 }

@@ -980,7 +989,8 @@ func getPeerIPDNSLabel(ip netip.Addr, peerHostName string) (string, error) {
 // SyncPeer checks whether peer is eligible for receiving NetworkMap (authenticated) and returns its NetworkMap if eligible
 func (am *DefaultAccountManager) SyncPeer(ctx context.Context, sync types.PeerSync, accountID string) (*nbpeer.Peer, *types.NetworkMap, []*posture.Checks, int64, error) {
 	var peer *nbpeer.Peer
-	var updated, versionChanged, ipv6CapabilityChanged bool
+	var ipv6CapabilityChanged bool
+	var metaDiff nbpeer.MetaDiff
 	var err error

 	settings, err := am.Store.GetAccountSettings(ctx, store.LockingStrengthNone, accountID)
@@ -1010,9 +1020,10 @@ func (am *DefaultAccountManager) SyncPeer(ctx context.Context, sync types.PeerSy
 		}

 		oldHasIPv6Cap := peer.HasCapability(nbpeer.PeerCapabilityIPv6Overlay)
-		updated, versionChanged = peer.UpdateMetaIfNew(ctx, sync.Meta)
+		newLocation := am.resolvePeerLocation(ctx, peer, sync.RealIP)
+		metaDiff = peer.UpdateMetaIfNew(ctx, sync.Meta, newLocation)
 		ipv6CapabilityChanged = oldHasIPv6Cap != peer.HasCapability(nbpeer.PeerCapabilityIPv6Overlay)
-		if updated {
+		if metaDiff.Updated() {
 			am.metrics.AccountManagerMetrics().CountPeerMetUpdate()
 			log.WithContext(ctx).Tracef("peer %s metadata updated", peer.ID)
 			if err = transaction.SavePeer(ctx, accountID, peer); err != nil {
@@ -1040,9 +1051,10 @@ func (am *DefaultAccountManager) SyncPeer(ctx context.Context, sync types.PeerSy
 		return nil, nil, nil, 0, err
 	}

-	if isStatusChanged || sync.UpdateAccountPeers || ipv6CapabilityChanged || (updated && (len(resPostureChecks) > 0 || versionChanged)) {
+	metaDiffAffectsPosture := posture.AffectsPosture(&metaDiff, resPostureChecks)
+	if isStatusChanged || sync.UpdateAccountPeers || ipv6CapabilityChanged || metaDiffAffectsPosture || metaDiff.VersionChanged || metaDiff.Hostname {
 		changedPeerIDs := []string{peer.ID}
-		affectedPeerIDs := am.syncPeerAffectedPeers(ctx, accountID, peer.ID, nmap, peerNotValid, updated, len(resPostureChecks) > 0)
+		affectedPeerIDs := am.syncPeerAffectedPeers(ctx, accountID, peer.ID, nmap, peerNotValid, metaDiffAffectsPosture)
 		if err = am.networkMapController.OnPeersUpdated(ctx, accountID, changedPeerIDs, affectedPeerIDs); err != nil {
 			return nil, nil, nil, 0, fmt.Errorf("notify network map controller of peer update: %w", err)
 		}
@@ -1059,8 +1071,8 @@ func (am *DefaultAccountManager) SyncPeer(ctx context.Context, sync types.PeerSy
 // metadata change that flips a posture result removes this peer from others'
 // maps asymmetrically; that case (and an invalid peer, whose map is empty) falls
 // back to the resolver.
-func (am *DefaultAccountManager) syncPeerAffectedPeers(ctx context.Context, accountID, peerID string, nmap *types.NetworkMap, peerNotValid, metaUpdated, hasPostureChecks bool) []string {
-	if peerNotValid || (metaUpdated && hasPostureChecks) {
+func (am *DefaultAccountManager) syncPeerAffectedPeers(ctx context.Context, accountID, peerID string, nmap *types.NetworkMap, peerNotValid, metaChangeAffectedPosture bool) []string {
+	if peerNotValid || metaChangeAffectedPosture {
 		return am.resolveAffectedPeersForPeerChanges(ctx, am.Store, accountID, []string{peerID})
 	}
 	return affectedPeerIDsFromNetworkMap(nmap, peerID)
@@ -1170,7 +1182,7 @@ func (am *DefaultAccountManager) LoginPeer(ctx context.Context, login types.Peer
 	}

 	// This is needed to keep in memory for the peer config. Otherwise browser client will end in a retry loop
-	peer.UpdateMetaIfNew(ctx, login.Meta)
+	peer.Meta = login.Meta

 	peerGroupIDs, err = getPeerGroupIDs(ctx, am.Store, accountID, peer.ID)
 	if err != nil {
--- a/management/server/peer/peer.go
+++ b/management/server/peer/peer.go
@@ -256,14 +256,18 @@ func (p *Peer) Copy() *Peer {
 	}
 }

-// UpdateMetaIfNew updates peer's system metadata if new information is provided
-// returns true if meta was updated, false otherwise
-func (p *Peer) UpdateMetaIfNew(ctx context.Context, meta PeerSystemMeta) (updated, versionChanged bool) {
+// UpdateMetaIfNew updates peer's system metadata and connection geo location if
+// new information is provided. newLocation is the geo location resolved from the
+// peer's current connection IP, or nil when there is nothing to apply (geo
+// disabled, no real IP, or the IP is unchanged); the caller owns the expensive
+// lookup and the same-IP guard. It returns a MetaDiff describing what changed;
+// diff.Updated() reports whether the peer needs to be persisted.
+func (p *Peer) UpdateMetaIfNew(ctx context.Context, meta PeerSystemMeta, newLocation *Location) MetaDiff {
 	if meta.isEmpty() {
-		return updated, versionChanged
+		return MetaDiff{}
 	}

-	versionChanged = p.Meta.WtVersion != meta.WtVersion
+	versionChanged := p.Meta.WtVersion != meta.WtVersion

 	// Avoid overwriting UIVersion if the update was triggered sole by the CLI client
 	if meta.UIVersion == "" {
@@ -272,97 +276,177 @@ func (p *Peer) UpdateMetaIfNew(ctx context.Context, meta PeerSystemMeta) (update

 	oldVersion := p.Meta.WtVersion

-	diff := metaDiff(p.Meta, meta)
-	if len(diff) != 0 {
+	diff := diffMeta(p.Meta, meta)
+	if diff.Any() {
 		p.Meta = meta
-		updated = true
+	}
+	diff.VersionChanged = versionChanged
+
+	locationInfo := ""
+	if newLocation != nil {
+		p.Location = *newLocation
+		diff.LocationChanged = true
+		locationInfo = fmt.Sprintf("location changed to %s, ", newLocation.ConnectionIP)
 	}

 	versionInfo := ""
-	if versionChanged {
+	if diff.VersionChanged {
 		versionInfo = fmt.Sprintf("version changed: %s -> %s, ", oldVersion, meta.WtVersion)
 	}

-	if len(diff) > 0 || versionChanged {
+	if diff.Any() || diff.VersionChanged || diff.LocationChanged {
 		log.WithContext(ctx).
-			Debugf("peer meta updated, %s%d field(s) changed: %s", versionInfo, len(diff), strings.Join(diff, ", "))
+			Debugf("peer meta updated, %s%s%d field(s) changed: %s", versionInfo, locationInfo, len(diff.Changed), strings.Join(diff.Changed, ", "))
 	}

-	return updated, versionChanged
+	return diff
+}
+
+// MetaDiff records which PeerSystemMeta fields differ between two metas. Each bool
+// maps to a single struct field, except Environment, which is split into Cloud and
+// Platform. Changed holds the human-readable `field: <old> -> <new>` entries so the
+// existing log line and isEqual can be derived from the same comparison.
+//
+// VersionChanged and LocationChanged sit outside the per-meta-field set:
+// VersionChanged tracks the WireGuard client version specifically (compared before
+// the UIVersion fixup, to signal client upgrades) and LocationChanged tracks the
+// peer's connection geo location, which lives on Peer rather than PeerSystemMeta.
+// Neither contributes an entry to Changed, so the field-coverage accounting stays
+// driven purely by the PeerSystemMeta comparison.
+type MetaDiff struct {
+	Hostname            bool
+	GoOS                bool
+	Kernel              bool
+	KernelVersion       bool
+	Core                bool
+	Platform            bool
+	OS                  bool
+	OSVersion           bool
+	WtVersion           bool
+	UIVersion           bool
+	SystemSerialNumber  bool
+	SystemProductName   bool
+	SystemManufacturer  bool
+	EnvironmentCloud    bool
+	EnvironmentPlatform bool
+	Flags               bool
+	Capabilities        bool
+	NetworkAddresses    bool
+	Files               bool
+
+	VersionChanged  bool
+	LocationChanged bool
+
+	Changed []string
+}
+
+// Any reports whether any PeerSystemMeta field changed.
+func (d MetaDiff) Any() bool {
+	return len(d.Changed) != 0
+}
+
+// Updated reports whether the peer needs to be persisted: any meta field changed
+// or the geo location changed. The version flag alone does not imply a write,
+// since a version change is also reflected in the WtVersion meta field.
+func (d MetaDiff) Updated() bool {
+	return d.Any() || d.LocationChanged || d.VersionChanged
 }

-// metaDiff returns a human-readable list of the fields that differ between the
-// old and new meta, each formatted as `field: <old> -> <new>`. It is the single
-// source of truth for meta comparison: isEqual reports equality as an empty
-// diff, so the log line can never disagree with the change decision. Slices are
-// cloned before sorting, so callers' meta is not mutated.
 func metaDiff(oldMeta, newMeta PeerSystemMeta) []string {
-	var diff []string
+	return diffMeta(oldMeta, newMeta).Changed
+}
+
+// diffMeta compares two metas field by field, returning both a per-field flag set
+// (for callers that need to know exactly what changed, e.g. matching against
+// posture checks) and the human-readable Changed list. It is the single source of
+// truth for meta comparison: isEqual reports equality as an empty diff, so the log
+// line, the change decision, and the flags can never disagree.
+func diffMeta(oldMeta, newMeta PeerSystemMeta) MetaDiff {
+	var d MetaDiff
 	add := func(field string, oldVal, newVal any) {
-		diff = append(diff, fmt.Sprintf("%s: %v -> %v", field, oldVal, newVal))
+		d.Changed = append(d.Changed, fmt.Sprintf("%s: %v -> %v", field, oldVal, newVal))
 	}

 	if oldMeta.Hostname != newMeta.Hostname {
+		d.Hostname = true
 		add("hostname", oldMeta.Hostname, newMeta.Hostname)
 	}
 	if oldMeta.GoOS != newMeta.GoOS {
+		d.GoOS = true
 		add("goos", oldMeta.GoOS, newMeta.GoOS)
 	}
 	if oldMeta.Kernel != newMeta.Kernel {
+		d.Kernel = true
 		add("kernel", oldMeta.Kernel, newMeta.Kernel)
 	}
 	if oldMeta.KernelVersion != newMeta.KernelVersion {
+		d.KernelVersion = true
 		add("kernel_version", oldMeta.KernelVersion, newMeta.KernelVersion)
 	}
 	if oldMeta.Core != newMeta.Core {
+		d.Core = true
 		add("core", oldMeta.Core, newMeta.Core)
 	}
 	if oldMeta.Platform != newMeta.Platform {
+		d.Platform = true
 		add("platform", oldMeta.Platform, newMeta.Platform)
 	}
 	if oldMeta.OS != newMeta.OS {
+		d.OS = true
 		add("os", oldMeta.OS, newMeta.OS)
 	}
 	if oldMeta.OSVersion != newMeta.OSVersion {
+		d.OSVersion = true
 		add("os_version", oldMeta.OSVersion, newMeta.OSVersion)
 	}
 	if oldMeta.WtVersion != newMeta.WtVersion {
+		d.WtVersion = true
 		add("wt_version", oldMeta.WtVersion, newMeta.WtVersion)
 	}
 	if oldMeta.UIVersion != newMeta.UIVersion {
+		d.UIVersion = true
 		add("ui_version", oldMeta.UIVersion, newMeta.UIVersion)
 	}
 	if oldMeta.SystemSerialNumber != newMeta.SystemSerialNumber {
+		d.SystemSerialNumber = true
 		add("system_serial_number", oldMeta.SystemSerialNumber, newMeta.SystemSerialNumber)
 	}
 	if oldMeta.SystemProductName != newMeta.SystemProductName {
+		d.SystemProductName = true
 		add("system_product_name", oldMeta.SystemProductName, newMeta.SystemProductName)
 	}
 	if oldMeta.SystemManufacturer != newMeta.SystemManufacturer {
+		d.SystemManufacturer = true
 		add("system_manufacturer", oldMeta.SystemManufacturer, newMeta.SystemManufacturer)
 	}
 	if oldMeta.Environment.Cloud != newMeta.Environment.Cloud {
+		d.EnvironmentCloud = true
 		add("environment_cloud", oldMeta.Environment.Cloud, newMeta.Environment.Cloud)
 	}
 	if oldMeta.Environment.Platform != newMeta.Environment.Platform {
+		d.EnvironmentPlatform = true
 		add("environment_platform", oldMeta.Environment.Platform, newMeta.Environment.Platform)
 	}
 	if !oldMeta.Flags.isEqual(newMeta.Flags) {
+		d.Flags = true
 		add("flags", fmt.Sprintf("%+v", oldMeta.Flags), fmt.Sprintf("%+v", newMeta.Flags))
 	}
 	if !capabilitiesEqual(oldMeta.Capabilities, newMeta.Capabilities) {
+		d.Capabilities = true
 		add("capabilities", oldMeta.Capabilities, newMeta.Capabilities)
 	}

 	if !sameMultiset(oldMeta.NetworkAddresses, newMeta.NetworkAddresses) {
+		d.NetworkAddresses = true
 		add("network_addresses", fmt.Sprintf("%v", oldMeta.NetworkAddresses), fmt.Sprintf("%v", newMeta.NetworkAddresses))
 	}

 	if !sameMultiset(oldMeta.Files, newMeta.Files) {
+		d.Files = true
 		add("files", fmt.Sprintf("%v", oldMeta.Files), fmt.Sprintf("%v", newMeta.Files))
 	}

-	return diff
+	return d
 }

 // sameMultiset reports whether two slices contain the same elements with the
--- a/management/server/posture/checks.go
+++ b/management/server/posture/checks.go
@@ -7,6 +7,7 @@ import (
 	"regexp"

 	"github.com/hashicorp/go-version"
+
 	nbpeer "github.com/netbirdio/netbird/management/server/peer"
 	"github.com/netbirdio/netbird/shared/management/http/api"
 	"github.com/netbirdio/netbird/shared/management/status"
@@ -51,6 +52,34 @@ type Checks struct {
 	Checks ChecksDefinition `gorm:"serializer:json"`
 }

+// AffectsPosture reports whether the peer metadata changes described by diff can
+// alter the outcome of any of the given posture checks. It maps each check kind to
+// the metadata fields it inspects, so an unrelated change (e.g. a hostname update)
+// does not force a posture re-evaluation.
+func AffectsPosture(diff *nbpeer.MetaDiff, checks []*Checks) bool {
+	if diff == nil {
+		return false
+	}
+	for _, c := range checks {
+		if c.Checks.ProcessCheck != nil && diff.Files {
+			return true
+		}
+		if c.Checks.OSVersionCheck != nil && (diff.OSVersion || diff.OS || diff.KernelVersion) {
+			return true
+		}
+		if c.Checks.NBVersionCheck != nil && diff.WtVersion {
+			return true
+		}
+		if c.Checks.GeoLocationCheck != nil && diff.LocationChanged {
+			return true
+		}
+		if c.Checks.PeerNetworkRangeCheck != nil && diff.NetworkAddresses {
+			return true
+		}
+	}
+	return false
+}
+
 // ChecksDefinition contains definition of actual check
 type ChecksDefinition struct {
 	NBVersionCheck        *NBVersionCheck        `json:",omitempty"`
--- a/management/server/store/sql_store.go
+++ b/management/server/store/sql_store.go
@@ -581,28 +581,6 @@ func (s *SqlStore) MarkPeerDisconnectedIfSameSession(ctx context.Context, accoun
 	return result.RowsAffected > 0, nil
 }

-func (s *SqlStore) SavePeerLocation(ctx context.Context, accountID string, peerWithLocation *nbpeer.Peer) error {
-	// To maintain data integrity, we create a copy of the peer's location to prevent unintended updates to other fields.
-	var peerCopy nbpeer.Peer
-	// Since the location field has been migrated to JSON serialization,
-	// updating the struct ensures the correct data format is inserted into the database.
-	peerCopy.Location = peerWithLocation.Location
-
-	result := s.db.Model(&nbpeer.Peer{}).
-		Where(accountAndIDQueryCondition, accountID, peerWithLocation.ID).
-		Updates(peerCopy)
-
-	if result.Error != nil {
-		return status.Errorf(status.Internal, "failed to save peer locations to store: %v", result.Error)
-	}
-
-	if result.RowsAffected == 0 {
-		return status.Errorf(status.NotFound, peerNotFoundFMT, peerWithLocation.ID)
-	}
-
-	return nil
-}
-
 // ApproveAccountPeers marks all peers that currently require approval in the given account as approved.
 func (s *SqlStore) ApproveAccountPeers(ctx context.Context, accountID string) (int, error) {
 	result := s.db.Model(&nbpeer.Peer{}).
--- a/management/server/store/sql_store_test.go
+++ b/management/server/store/sql_store_test.go
@@ -618,56 +618,6 @@ func TestSqlStore_SavePeerStatus(t *testing.T) {
 	assert.WithinDurationf(t, newStatus.LastSeen, actual.LastSeen.UTC(), time.Millisecond, "LastSeen should be equal")
 }

-func TestSqlStore_SavePeerLocation(t *testing.T) {
-	store, cleanUp, err := NewTestStoreFromSQL(context.Background(), "../testdata/store.sql", t.TempDir())
-	t.Cleanup(cleanUp)
-	assert.NoError(t, err)
-
-	account, err := store.GetAccount(context.Background(), "bf1c8084-ba50-4ce7-9439-34653001fc3b")
-	require.NoError(t, err)
-
-	peer := &nbpeer.Peer{
-		AccountID: account.Id,
-		ID:        "testpeer",
-		Location: nbpeer.Location{
-			ConnectionIP: net.ParseIP("0.0.0.0"),
-			CountryCode:  "YY",
-			CityName:     "City",
-			GeoNameID:    1,
-		},
-		CreatedAt: time.Now().UTC(),
-		Meta:      nbpeer.PeerSystemMeta{},
-	}
-	// error is expected as peer is not in store yet
-	err = store.SavePeerLocation(context.Background(), account.Id, peer)
-	assert.Error(t, err)
-
-	account.Peers[peer.ID] = peer
-	err = store.SaveAccount(context.Background(), account)
-	require.NoError(t, err)
-
-	peer.Location.ConnectionIP = net.ParseIP("35.1.1.1")
-	peer.Location.CountryCode = "DE"
-	peer.Location.CityName = "Berlin"
-	peer.Location.GeoNameID = 2950159
-
-	err = store.SavePeerLocation(context.Background(), account.Id, account.Peers[peer.ID])
-	assert.NoError(t, err)
-
-	account, err = store.GetAccount(context.Background(), account.Id)
-	require.NoError(t, err)
-
-	actual := account.Peers[peer.ID].Location
-	assert.Equal(t, peer.Location, actual)
-
-	peer.ID = "non-existing-peer"
-	err = store.SavePeerLocation(context.Background(), account.Id, peer)
-	assert.Error(t, err)
-	parsedErr, ok := status.FromError(err)
-	require.True(t, ok)
-	require.Equal(t, status.NotFound, parsedErr.Type(), "should return not found error")
-}
-
 func Test_TestGetAccountByPrivateDomain(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("The SQLite store is not properly supported by Windows yet")
--- a/management/server/store/store.go
+++ b/management/server/store/store.go
@@ -185,7 +185,6 @@ type Store interface {
 	// recorded by the database. Returns true when the update happened,
 	// false when a newer session has taken over.
 	MarkPeerDisconnectedIfSameSession(ctx context.Context, accountID, peerID string, sessionStartedAt int64) (bool, error)
-	SavePeerLocation(ctx context.Context, accountID string, peer *nbpeer.Peer) error
 	ApproveAccountPeers(ctx context.Context, accountID string) (int, error)
 	DeletePeer(ctx context.Context, accountID string, peerID string) error

--- a/management/server/store/store_mock.go
+++ b/management/server/store/store_mock.go
@@ -2968,20 +2968,6 @@ func (mr *MockStoreMockRecorder) SavePeer(ctx, accountID, peer interface{}) *gom
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SavePeer", reflect.TypeOf((*MockStore)(nil).SavePeer), ctx, accountID, peer)
 }

-// SavePeerLocation mocks base method.
-func (m *MockStore) SavePeerLocation(ctx context.Context, accountID string, peer *peer.Peer) error {
-	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "SavePeerLocation", ctx, accountID, peer)
-	ret0, _ := ret[0].(error)
-	return ret0
-}
-
-// SavePeerLocation indicates an expected call of SavePeerLocation.
-func (mr *MockStoreMockRecorder) SavePeerLocation(ctx, accountID, peer interface{}) *gomock.Call {
-	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SavePeerLocation", reflect.TypeOf((*MockStore)(nil).SavePeerLocation), ctx, accountID, peer)
-}
-
 // SavePeerStatus mocks base method.
 func (m *MockStore) SavePeerStatus(ctx context.Context, accountID, peerID string, status peer.PeerStatus) error {
 	m.ctrl.T.Helper()
--- a/management/server/types/peer.go
+++ b/management/server/types/peer.go
@@ -12,6 +12,9 @@ type PeerSync struct {
 	WireGuardPubKey string
 	// Meta is the system information passed by peer, must be always present
 	Meta nbpeer.PeerSystemMeta
+	// RealIP is the peer's connection IP, used to refresh its geo location.
+	// May be nil when the request has no associated connection IP.
+	RealIP net.IP
 	// UpdateAccountPeers indicate updating account peers,
 	// which occurs when the peer's metadata is updated
 	UpdateAccountPeers bool
--- a/shared/signal/client/client.go
+++ b/shared/signal/client/client.go
@@ -33,7 +33,7 @@ type Client interface {
 	Receive(ctx context.Context, msgHandler func(msg *proto.Message) error) error
 	Ready() bool
 	IsHealthy() bool
-	WaitStreamConnected()
+	WaitStreamConnected(context.Context)
 	SendToStream(msg *proto.EncryptedMessage) error
 	Send(msg *proto.Message) error
 	SetOnReconnectedListener(func())
--- a/shared/signal/client/client_test.go
+++ b/shared/signal/client/client_test.go
@@ -65,7 +65,10 @@ var _ = Describe("GrpcClient", func() {
 						return
 					}
 				}()
-				clientA.WaitStreamConnected()
+				ctxA, cancelA := context.WithTimeout(context.Background(), 5*time.Second)
+				defer cancelA()
+				clientA.WaitStreamConnected(ctxA)
+				Expect(clientA.StreamConnected()).To(BeTrue())

 				// connect PeerB to Signal
 				keyB, _ := wgtypes.GenerateKey()
@@ -91,7 +94,10 @@ var _ = Describe("GrpcClient", func() {
 					}
 				}()

-				clientB.WaitStreamConnected()
+				ctxB, cancelB := context.WithTimeout(context.Background(), 5*time.Second)
+				defer cancelB()
+				clientB.WaitStreamConnected(ctxB)
+				Expect(clientB.StreamConnected()).To(BeTrue())

 				// PeerA initiates ping-pong
 				err := clientA.Send(&sigProto.Message{
@@ -129,8 +135,10 @@ var _ = Describe("GrpcClient", func() {
 						return
 					}
 				}()
-				client.WaitStreamConnected()
-				Expect(client).NotTo(BeNil())
+				ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+				defer cancel()
+				client.WaitStreamConnected(ctx)
+				Expect(client.StreamConnected()).To(BeTrue())
 			})
 		})

--- a/shared/signal/client/grpc.go
+++ b/shared/signal/client/grpc.go
@@ -246,15 +246,6 @@ func (c *GrpcClient) notifyStreamConnected() {
 	}
 }

-func (c *GrpcClient) getStreamStatusChan() <-chan struct{} {
-	c.mux.Lock()
-	defer c.mux.Unlock()
-	if c.connectedCh == nil {
-		c.connectedCh = make(chan struct{})
-	}
-	return c.connectedCh
-}
-
 func (c *GrpcClient) connect(ctx context.Context, key string) (proto.SignalExchange_ConnectStreamClient, error) {
 	c.stream = nil

@@ -310,14 +301,24 @@ func (c *GrpcClient) IsHealthy() bool {
 }

 // WaitStreamConnected waits until the client is connected to the Signal stream
-func (c *GrpcClient) WaitStreamConnected() {
-
+func (c *GrpcClient) WaitStreamConnected(ctx context.Context) {
+	// Check the status and obtain the wait channel atomically: otherwise
+	// notifyStreamConnected could flip the status and close/clear the channel
+	// between the check and the channel creation, leaving us waiting forever on
+	// a stale channel.
+	c.mux.Lock()
 	if c.status == StreamConnected {
+		c.mux.Unlock()
 		return
 	}
+	if c.connectedCh == nil {
+		c.connectedCh = make(chan struct{})
+	}
+	ch := c.connectedCh
+	c.mux.Unlock()

-	ch := c.getStreamStatusChan()
 	select {
+	case <-ctx.Done():
 	case <-c.ctx.Done():
 	case <-ch:
 	}
--- a/shared/signal/client/mock.go
+++ b/shared/signal/client/mock.go
@@ -55,7 +55,7 @@ func (sm *MockClient) Ready() bool {
 	return sm.ReadyFunc()
 }

-func (sm *MockClient) WaitStreamConnected() {
+func (sm *MockClient) WaitStreamConnected(context.Context) {
 	if sm.WaitStreamConnectedFunc == nil {
 		return
 	}
--- a/shared/signal/client/watchdog_test.go
+++ b/shared/signal/client/watchdog_test.go
@@ -65,7 +65,7 @@ func TestReceiveProbeRoundTrips(t *testing.T) {

 	streamReady := make(chan struct{})
 	go func() {
-		client.WaitStreamConnected()
+		client.WaitStreamConnected(ctx)
 		close(streamReady)
 	}()
 	select {
Author	SHA1	Message	Date
Viktor Liu	17b2044596	[client] Skip re-resolving cached management cache domains (#6518 )	2026-06-23 17:55:57 +02:00
Bethuel Mmbaga	07101c59ac	[management] Reschedule inactivity expiration when a peer disconnects (#6523 )	2026-06-23 17:44:32 +03:00
Riccardo Manfrin	51b6f6291b	Fixup debug config (#6514 )	2026-06-22 22:01:49 +02:00
Pascal Fischer	2ebf26006a	[management] empty file check in nmap on other posturechecks (#6511 )	2026-06-22 19:54:38 +02:00
Pascal Fischer	211a26019a	[management] validate meta change against posture checks (#6510 )	2026-06-22 19:42:04 +02:00
Pascal Fischer	6c26178ad5	[management] do not use meta diff for login (#6502 )	2026-06-22 17:36:52 +02:00
Bethuel Mmbaga	af3b7e4497	[misc] Add enterprise getting-started and migrate script (#6501 )	2026-06-22 16:58:45 +03:00
Zoltan Papp	e84f6527f7	[client] fix WaitStreamConnected test call after ctx signature change (#6503 ) watchdog_test.go called WaitStreamConnected() without the context.Context argument added in #6443, breaking the signal client test build.	2026-06-22 15:53:11 +02:00
Zoltan Papp	ac9529ea8c	[client] Fix engine lifecyrcle race (#6443 ) * [client] always clean up on Engine.Start failure via defer The rosenpass init paths (NewManager/Run) returned without calling e.close(), leaking the WireGuard interface and other partially initialized state on failure. Per-branch cleanup was easy to miss when adding new early returns. Convert Start to a named error return and tear down via a single defer that calls e.close() whenever err != nil, removing the scattered per-branch close() calls (including the redundant one in initFirewall). * [client] make Engine single-use and guard against double Start Create the run context once in NewEngine instead of in Start. This keeps e.cancel valid for the engine's whole lifetime, so Stop can cancel a Start that is blocked waiting on the network while holding syncMsgMux: Stop now cancels before taking the lock, unblocking that Start so it can release the mutex. Reject re-entry into Start: a non-nil wgInterface means a prior Start already ran (ErrEngineAlreadyStarted), and a cancelled run context means the engine was stopped (ErrEngineAlreadyStopped). Both checks run before the cleanup defer so a duplicate call cannot tear down the running engine's state. * [client] let engine context unblock WaitStreamConnected WaitStreamConnected only watched the signal client's own context, which derives from the parent engineCtx rather than the engine's run context. A Start blocked here (signal stream not yet up) could therefore not be released by Engine.Stop, since Stop only cancels the engine's run context. Pass a context into WaitStreamConnected and select on it too, and have the engine pass e.ctx, so Stop cancelling e.ctx unblocks a parked Start. Update the Client interface, the mock, and callers accordingly. * [client] fix Start/Stop race by making the run loop own engine shutdown ConnectClient.Stop stopped the engine directly while the run loop's backoff cycle could still be starting an engine, so Engine.close raced Engine.Start (e.g. firewall setup reading wgInterface while close nils it). embed.Client.Start's rollback only avoided a deadlock by cancelling before Stop; the race itself remained and was caught by -race. Make the run loop the sole owner of engine shutdown: derive the run context in NewConnectClient, and have Stop cancel it and wait for the loop to exit (skipping the wait when the loop never ran) instead of calling engine.Stop. The loop now always stops the engine on its way out, dropping the unsynchronised wgInterface check it used to guard that call. Self-calls from within the loop use runCancel to avoid waiting on themselves. embed keeps a defensive pre-Stop cancel(); the daemon's cleanupConnection gets a TODO to adopt Stop() rather than stopping the engine in parallel. * [client] init context state in engine tests Engine tests built the engine context with context.WithCancel( context.Background()), omitting CtxInitState. Now that the run context is created in the constructor, the wgIfaceMonitor goroutine can reach triggerClientRestart during teardown, which calls CtxGetState and panics on the missing state. Real entry points (up, embed, service) always CtxInitState; only the tests skipped it. * [client] interrupt connect backoff on context cancel The run loop retried with a raw ExponentialBackOff, so a backoff sleep ignored context cancellation. Now that ConnectClient.Stop waits for the run loop to exit, a cancel landing during a sleep would block Stop for the full interval (up to MaxInterval). Wrap the backoff with the run context so Retry returns promptly on cancel; the retry budget itself (MaxElapsedTime) is unchanged. * [client] bound WaitStreamConnected in signal client tests The tests waited on WaitStreamConnected with context.Background() and the client's own context was also Background, so a stream that never connects would hang until the suite timeout. Pass a 5s timeout context and assert StreamConnected afterwards so the tests fail fast with a clear reason. * [client] fix WaitStreamConnected stale-channel race The StreamConnected check and the wait-channel creation took the mutex separately, so notifyStreamConnected could set the status and close/clear connectedCh in between: the waiter then created a fresh channel nobody would ever close and blocked forever. Also, the status read was unlocked while notify wrote it under the mutex (a data race). Do the check and the channel fetch in one locked section; drop the now-unused getStreamStatusChan helper. Pre-existing bug, not introduced by this branch. * [client] abort Start if context cancelled while waiting for signal stream receiveSignalEvents blocks in WaitStreamConnected until the signal stream connects or the context is cancelled. If Stop cancelled e.ctx while Start was parked there, Start kept going: it started the remaining subsystems on a cancelled context and marked a shutting-down engine as started. Return the context error from receiveSignalEvents and propagate it from Start, so the deferred cleanup runs and the cancellation reaches the caller. * [client] clean up all started components on Start failure Start's failure defer only called close(), which covers the wg interface, firewall, rosenpass and port forwarding but leaves connMgr, srWatcher, route/DNS/flow/state managers and the monitor goroutines running. A late failure (e.g. the context-cancelled check after the signal stream) thus leaked them. Extract Stop's locked teardown into stopLocked (caller holds syncMsgMux, does not wait on shutdownWg) and call it from both Stop and Start's defer. The defer also cancels the run context first so goroutines started before the failure unwind. Teardown order is unchanged.	2026-06-22 13:52:57 +02:00
Zoltan Papp	f736ef9647	[client/ios] Add Auth.Stop() to cancel an in-progress interactive login (#6486 ) The iOS PKCE login runs in the main-app process, decoupled from the network extension (the extension's client context is torn down on login-required, which would otherwise kill the WaitToken goroutine before the OAuth callback arrives). Because it is decoupled, nothing aborted the flow when the user dismissed the browser without logging in: WaitToken kept its loopback HTTP server bound to the redirect port until the flow expired, so the next connect stalled trying to bind the same port. Make the Auth context cancellable and add Auth.Stop(), which cancels it. Cancelling unblocks WaitToken, whose deferred server.Shutdown frees the port immediately. This mirrors how Android's stopEngine() aborts login via the engine context. NewAuthWithConfig now also derives a cancellable context; its only iOS caller uses LoginSync (no interactive server), so behaviour is unchanged there.	2026-06-22 13:27:21 +02:00
Maycon Santos	cf58bf1ba9	[misc] Add TARGETPLATFORM build argument to Docker build commands (#6499 )	2026-06-22 12:43:19 +02:00
Viktor Liu	522b8ed969	[client] Surface DNS forwarder upstream failures via Extended DNS Errors (#6441 )	2026-06-22 12:41:33 +02:00
dependabot[bot]	c9e99659ea	[misc] Bump the actions group across 1 directory with 9 updates (#6451 ) Bumps the actions group with 9 updates in the / directory: \| Package \| From \| To \| \| --- \| --- \| --- \| \| [actions/checkout](https://github.com/actions/checkout) \| `6.0.2` \| `7.0.0` \| \| [actions/setup-go](https://github.com/actions/setup-go) \| `6.3.0` \| `6.4.0` \| \| [codecov/codecov-action](https://github.com/codecov/codecov-action) \| `6.0.1` \| `7.0.0` \| \| [vmactions/freebsd-vm](https://github.com/vmactions/freebsd-vm) \| `1.4.5` \| `1.4.8` \| \| [actions/setup-java](https://github.com/actions/setup-java) \| `5.2.0` \| `5.3.0` \| \| [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) \| `4.0.0` \| `4.1.0` \| \| [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) \| `4.0.0` \| `4.1.0` \| \| [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) \| `7.2.0` \| `7.2.2` \| \| [actions/download-artifact](https://github.com/actions/download-artifact) \| `8.0.0` \| `8.0.1` \| Updates `actions/checkout` from 6.0.2 to 7.0.0 - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](`de0fac2e45...9c091bb21b`) Updates `actions/setup-go` from 6.3.0 to 6.4.0 - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](`4b73464bb3...4a3601121d`) Updates `codecov/codecov-action` from 6.0.1 to 7.0.0 - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](`e79a6962e0...fb8b3582c8`) Updates `vmactions/freebsd-vm` from 1.4.5 to 1.4.8 - [Release notes](https://github.com/vmactions/freebsd-vm/releases) - [Commits](`d1e6581156...b84ab5559b`) Updates `actions/setup-java` from 5.2.0 to 5.3.0 - [Release notes](https://github.com/actions/setup-java/releases) - [Commits](`be666c2fcd...ad2b38190b`) Updates `docker/setup-qemu-action` from 4.0.0 to 4.1.0 - [Release notes](https://github.com/docker/setup-qemu-action/releases) - [Commits](`ce360397dd...06116385d9`) Updates `docker/setup-buildx-action` from 4.0.0 to 4.1.0 - [Release notes](https://github.com/docker/setup-buildx-action/releases) - [Commits](`4d04d5d948...d7f5e7f509`) Updates `goreleaser/goreleaser-action` from 7.2.0 to 7.2.2 - [Release notes](https://github.com/goreleaser/goreleaser-action/releases) - [Commits](`4c6ab561ad...5daf1e915a`) Updates `actions/download-artifact` from 8.0.0 to 8.0.1 - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](`70fc10c6e5...3e5f45b2cf`) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: 6.0.3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions - dependency-name: actions/download-artifact dependency-version: 8.0.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions - dependency-name: actions/setup-go dependency-version: 6.4.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: actions/setup-java dependency-version: 5.3.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: codecov/codecov-action dependency-version: 7.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions - dependency-name: docker/setup-buildx-action dependency-version: 4.1.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: docker/setup-qemu-action dependency-version: 4.1.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: goreleaser/goreleaser-action dependency-version: 7.2.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions - dependency-name: vmactions/freebsd-vm dependency-version: 1.4.6 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-06-22 09:43:33 +02:00
Viktor Liu	58c79f5878	[client] Fix DNS custom zone teardown: handler leak and external CNAME resolution (#6445 )	2026-06-19 17:33:09 +02:00
Viktor Liu	15a0504fb1	[client] Treat answering upstreams as reachable and widen DNS health grace window (#6453 )	2026-06-19 17:32:49 +02:00