diff --git a/client/internal/engine.go b/client/internal/engine.go index 63ba1c9f2..175c875d1 100644 --- a/client/internal/engine.go +++ b/client/internal/engine.go @@ -828,8 +828,16 @@ func (e *Engine) handleAutoUpdateVersion(autoUpdateSettings *mgmProto.AutoUpdate } func (e *Engine) handleSync(update *mgmProto.SyncResponse) error { + started := time.Now() + defer func() { + log.Warnf("sync with lock finished in %s", time.Since(started)) + }() e.syncMsgMux.Lock() defer e.syncMsgMux.Unlock() + started2 := time.Now() + defer func() { + log.Warnf("sync finished in %s", time.Since(started2)) + }() // Check context INSIDE lock to ensure atomicity with shutdown if e.ctx.Err() != nil { diff --git a/client/internal/routemanager/manager.go b/client/internal/routemanager/manager.go index 2baa0e668..955ec5350 100644 --- a/client/internal/routemanager/manager.go +++ b/client/internal/routemanager/manager.go @@ -319,6 +319,8 @@ func (m *DefaultManager) Stop(stateManager *statemanager.Manager) { // UpdateRoutes compares received routes with existing routes and removes, updates or adds them to the client and server maps func (m *DefaultManager) updateSystemRoutes(newRoutes route.HAMap) error { + startTotal := time.Now() + toAdd := make(map[route.HAUniqueID]*route.Route) toRemove := make(map[route.HAUniqueID]client.RouteHandler) @@ -337,13 +339,20 @@ func (m *DefaultManager) updateSystemRoutes(newRoutes route.HAMap) error { } var merr *multierror.Error + + startRemove := time.Now() for id, handler := range toRemove { if err := handler.RemoveRoute(); err != nil { merr = multierror.Append(merr, fmt.Errorf("remove route %s: %w", handler.String(), err)) } delete(m.activeRoutes, id) } + if len(toRemove) > 0 { + log.Warnf("[TIMING] updateSystemRoutes: removed %d routes in %v", len(toRemove), time.Since(startRemove)) + } + startAdd := time.Now() + addedCount := 0 for id, route := range toAdd { params := common.HandlerParams{ Route: route, @@ -365,7 +374,14 @@ func (m *DefaultManager) updateSystemRoutes(newRoutes route.HAMap) error { continue } m.activeRoutes[id] = handler + addedCount++ } + if len(toAdd) > 0 { + log.Warnf("[TIMING] updateSystemRoutes: added %d routes in %v (%.2f routes/sec)", + addedCount, time.Since(startAdd), float64(addedCount)/time.Since(startAdd).Seconds()) + } + + log.Warnf("[TIMING] updateSystemRoutes: total %d routes processed in %v", len(toAdd)+len(toRemove), time.Since(startTotal)) return nberrors.FormatErrorOrNil(merr) } diff --git a/client/internal/routemanager/refcounter/refcounter.go b/client/internal/routemanager/refcounter/refcounter.go index 27a724f50..14afd528a 100644 --- a/client/internal/routemanager/refcounter/refcounter.go +++ b/client/internal/routemanager/refcounter/refcounter.go @@ -7,6 +7,7 @@ import ( "runtime" "strings" "sync" + "time" "github.com/hashicorp/go-multierror" log "github.com/sirupsen/logrus" @@ -110,7 +111,11 @@ func (rm *Counter[Key, I, O]) increment(key Key, in I) (Ref[O], error) { // Call AddFunc only if it's a new key if ref.Count == 0 { logCallerF("Calling add for key %v", key) + startTime := time.Now() out, err := rm.add(key, in) + if elapsed := time.Since(startTime); elapsed > 10*time.Millisecond { + log.Warnf("[TIMING] refcounter.add(%v): %v", key, elapsed) + } if errors.Is(err, ErrIgnore) { return ref, nil diff --git a/client/internal/routemanager/systemops/route_benchmark_windows_test.go b/client/internal/routemanager/systemops/route_benchmark_windows_test.go new file mode 100644 index 000000000..9f627cb92 --- /dev/null +++ b/client/internal/routemanager/systemops/route_benchmark_windows_test.go @@ -0,0 +1,290 @@ +//go:build windows + +package systemops + +import ( + "net" + "net/netip" + "testing" + "time" + + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "golang.org/x/sys/windows" + "golang.zx2c4.com/wireguard/tun" + "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" +) + +const ( + testInterfaceName = "wg_bench_test" + testInterfaceGUID = "{a1b2c3d4-e5f6-7890-abcd-ef1234567890}" + testInterfaceMTU = 1280 + benchmarkRoutes = 4000 +) + +// BenchmarkRouteAddition benchmarks route addition using both interface-only and address-based methods. +func BenchmarkRouteAddition(b *testing.B) { + log.SetLevel(log.WarnLevel) + + tunDev, ifaceIdx, cleanup := setupBenchmarkInterface(b) + defer cleanup() + + gatewayIP := netip.MustParseAddr("10.200.0.254") + + b.Run("InterfaceOnly", func(b *testing.B) { + benchmarkRouteMode(b, ifaceIdx, netip.Addr{}, benchmarkRoutes) + }) + + b.Run("WithGatewayAddress", func(b *testing.B) { + benchmarkRouteMode(b, ifaceIdx, gatewayIP, benchmarkRoutes) + }) + + _ = tunDev +} + +// TestRouteAdditionSpeed tests and compares route addition speed for both modes. +func TestRouteAdditionSpeed(t *testing.T) { + if testing.Short() { + t.Skip("Skipping route benchmark test in short mode") + } + + log.SetLevel(log.WarnLevel) + + tunDev, ifaceIdx, cleanup := setupBenchmarkInterface(t) + defer cleanup() + + gatewayIP := netip.MustParseAddr("10.200.0.254") + numRoutes := benchmarkRoutes + + t.Logf("Testing route addition with %d routes on interface index %d", numRoutes, ifaceIdx) + + // Test interface-only mode + t.Run("InterfaceOnly", func(t *testing.T) { + routes := generateTestPrefixes(numRoutes, 0) + nexthop := Nexthop{ + Intf: &net.Interface{Index: ifaceIdx}, + } + + start := time.Now() + addedRoutes := addRoutesWithCleanup(t, routes, nexthop) + addDuration := time.Since(start) + + t.Logf("Interface-only mode: added %d routes in %v (%.2f routes/sec)", + addedRoutes, addDuration, float64(addedRoutes)/addDuration.Seconds()) + + start = time.Now() + deleteRoutes(t, routes[:addedRoutes], nexthop) + deleteDuration := time.Since(start) + + t.Logf("Interface-only mode: deleted %d routes in %v (%.2f routes/sec)", + addedRoutes, deleteDuration, float64(addedRoutes)/deleteDuration.Seconds()) + }) + + // Test address-based mode + t.Run("WithGatewayAddress", func(t *testing.T) { + routes := generateTestPrefixes(numRoutes, 1) + nexthop := Nexthop{ + IP: gatewayIP, + Intf: &net.Interface{Index: ifaceIdx}, + } + + start := time.Now() + addedRoutes := addRoutesWithCleanup(t, routes, nexthop) + addDuration := time.Since(start) + + t.Logf("Address-based mode: added %d routes in %v (%.2f routes/sec)", + addedRoutes, addDuration, float64(addedRoutes)/addDuration.Seconds()) + + start = time.Now() + deleteRoutes(t, routes[:addedRoutes], nexthop) + deleteDuration := time.Since(start) + + t.Logf("Address-based mode: deleted %d routes in %v (%.2f routes/sec)", + addedRoutes, deleteDuration, float64(addedRoutes)/deleteDuration.Seconds()) + }) + + _ = tunDev +} + +// TestRouteAdditionSpeedComparison runs a direct comparison test and outputs results. +func TestRouteAdditionSpeedComparison(t *testing.T) { + if testing.Short() { + t.Skip("Skipping route benchmark comparison test in short mode") + } + + log.SetLevel(log.WarnLevel) + + tunDev, ifaceIdx, cleanup := setupBenchmarkInterface(t) + defer cleanup() + + gatewayIP := netip.MustParseAddr("10.200.0.254") + numRoutes := benchmarkRoutes + + t.Logf("=== Route Addition Speed Comparison ===") + t.Logf("Testing with %d routes on interface index %d", numRoutes, ifaceIdx) + t.Logf("") + + // Interface-only mode test + routesIfaceOnly := generateTestPrefixes(numRoutes, 0) + nexthopIfaceOnly := Nexthop{ + Intf: &net.Interface{Index: ifaceIdx}, + } + + startIfaceOnly := time.Now() + addedIfaceOnly := addRoutesWithCleanup(t, routesIfaceOnly, nexthopIfaceOnly) + durationIfaceOnly := time.Since(startIfaceOnly) + deleteRoutes(t, routesIfaceOnly[:addedIfaceOnly], nexthopIfaceOnly) + + // Address-based mode test + routesWithAddr := generateTestPrefixes(numRoutes, 1) + nexthopWithAddr := Nexthop{ + IP: gatewayIP, + Intf: &net.Interface{Index: ifaceIdx}, + } + + startWithAddr := time.Now() + addedWithAddr := addRoutesWithCleanup(t, routesWithAddr, nexthopWithAddr) + durationWithAddr := time.Since(startWithAddr) + deleteRoutes(t, routesWithAddr[:addedWithAddr], nexthopWithAddr) + + // Output comparison results + t.Logf("") + t.Logf("=== Results ===") + t.Logf("Interface-only mode (gateway=0.0.0.0):") + t.Logf(" Routes added: %d", addedIfaceOnly) + t.Logf(" Duration: %v", durationIfaceOnly) + t.Logf(" Speed: %.2f routes/sec", float64(addedIfaceOnly)/durationIfaceOnly.Seconds()) + t.Logf("") + t.Logf("Address-based mode (gateway=%s):", gatewayIP) + t.Logf(" Routes added: %d", addedWithAddr) + t.Logf(" Duration: %v", durationWithAddr) + t.Logf(" Speed: %.2f routes/sec", float64(addedWithAddr)/durationWithAddr.Seconds()) + t.Logf("") + + if durationIfaceOnly < durationWithAddr { + speedup := float64(durationWithAddr) / float64(durationIfaceOnly) + t.Logf("Interface-only mode is %.2fx faster", speedup) + } else { + speedup := float64(durationIfaceOnly) / float64(durationWithAddr) + t.Logf("Address-based mode is %.2fx faster", speedup) + } + + _ = tunDev +} + +func setupBenchmarkInterface(tb testing.TB) (*tun.NativeTun, int, func()) { + tb.Helper() + + guid, err := windows.GUIDFromString(testInterfaceGUID) + require.NoError(tb, err, "Failed to parse GUID") + + tunDevice, err := tun.CreateTUNWithRequestedGUID(testInterfaceName, &guid, testInterfaceMTU) + require.NoError(tb, err, "Failed to create TUN device") + + nativeTun := tunDevice.(*tun.NativeTun) + ifaceName, err := nativeTun.Name() + require.NoError(tb, err, "Failed to get interface name") + + iface, err := net.InterfaceByName(ifaceName) + require.NoError(tb, err, "Failed to get interface by name") + + tb.Logf("Created test interface: %s (index: %d)", ifaceName, iface.Index) + + // Assign an IP address to the interface using winipcfg + assignInterfaceAddress(tb, nativeTun) + + cleanup := func() { + if err := tunDevice.Close(); err != nil { + tb.Logf("Failed to close TUN device: %v", err) + } + } + + return nativeTun, iface.Index, cleanup +} + +func assignInterfaceAddress(tb testing.TB, nativeTun *tun.NativeTun) { + tb.Helper() + + luid := winipcfg.LUID(nativeTun.LUID()) + addr := netip.MustParsePrefix("10.200.0.1/24") + + err := luid.SetIPAddresses([]netip.Prefix{addr}) + require.NoError(tb, err, "Failed to assign IP address to interface") + + // Allow the network stack to fully initialize the interface. + time.Sleep(100 * time.Millisecond) + + tb.Logf("Assigned address %s to interface (LUID: %d)", addr, luid) +} + +func generateTestPrefixes(count int, offset int) []netip.Prefix { + prefixes := make([]netip.Prefix, count) + + // Generate unique /32 prefixes in the 172.16.0.0/12 range + baseIP := 172<<24 | 16<<16 + + for i := 0; i < count; i++ { + ipNum := baseIP + i + (offset * count) + ip := netip.AddrFrom4([4]byte{ + byte(ipNum >> 24), + byte(ipNum >> 16), + byte(ipNum >> 8), + byte(ipNum), + }) + prefixes[i] = netip.PrefixFrom(ip, 32) + } + + return prefixes +} + +func addRoutesWithCleanup(tb testing.TB, prefixes []netip.Prefix, nexthop Nexthop) int { + tb.Helper() + + added := 0 + for _, prefix := range prefixes { + if err := addRoute(prefix, nexthop); err != nil { + tb.Logf("Failed to add route %s after %d successful additions: %v", prefix, added, err) + break + } + added++ + } + + return added +} + +func deleteRoutes(tb testing.TB, prefixes []netip.Prefix, nexthop Nexthop) { + tb.Helper() + + for _, prefix := range prefixes { + if err := deleteRoute(prefix, nexthop); err != nil { + log.Debugf("Failed to delete route %s: %v", prefix, err) + } + } +} + +func benchmarkRouteMode(b *testing.B, ifaceIdx int, gatewayIP netip.Addr, routeCount int) { + b.Helper() + + for i := 0; i < b.N; i++ { + b.StopTimer() + prefixes := generateTestPrefixes(routeCount, i) + nexthop := Nexthop{ + Intf: &net.Interface{Index: ifaceIdx}, + } + if gatewayIP.IsValid() { + nexthop.IP = gatewayIP + } + b.StartTimer() + + for _, prefix := range prefixes { + if err := addRoute(prefix, nexthop); err != nil { + b.Fatalf("Failed to add route: %v", err) + } + } + + b.StopTimer() + for _, prefix := range prefixes { + _ = deleteRoute(prefix, nexthop) + } + } +} diff --git a/client/internal/routemanager/systemops/systemops.go b/client/internal/routemanager/systemops/systemops.go index c0ca21d22..fc55727c7 100644 --- a/client/internal/routemanager/systemops/systemops.go +++ b/client/internal/routemanager/systemops/systemops.go @@ -95,7 +95,12 @@ func (r *SysOps) getSeq() int { return int(r.seq.Add(1)) } +var t = true + func (r *SysOps) validateRoute(prefix netip.Prefix) error { + if t { + return nil + } addr := prefix.Addr() switch { diff --git a/client/internal/routemanager/systemops/systemops_generic.go b/client/internal/routemanager/systemops/systemops_generic.go index ec219c7fe..b81fc3a51 100644 --- a/client/internal/routemanager/systemops/systemops_generic.go +++ b/client/internal/routemanager/systemops/systemops_generic.go @@ -208,6 +208,11 @@ func (r *SysOps) refreshLocalSubnetsCache() { // genericAddVPNRoute adds a new route to the vpn interface, it splits the default prefix // in two /1 prefixes to avoid replacing the existing default route func (r *SysOps) genericAddVPNRoute(prefix netip.Prefix, intf *net.Interface) error { + startTime := time.Now() + defer func() { + log.Warnf("[TIMING] genericAddVPNRoute(%s): total %v", prefix, time.Since(startTime)) + }() + nextHop := Nexthop{netip.Addr{}, intf} switch prefix { @@ -339,6 +344,13 @@ func (r *SysOps) setupHooks(initAddresses []net.IP, stateManager *statemanager.M } func GetNextHop(ip netip.Addr) (Nexthop, error) { + startTime := time.Now() + defer func() { + if elapsed := time.Since(startTime); elapsed > 10*time.Millisecond { + log.Warnf("[TIMING] GetNextHop(%s): %v", ip, elapsed) + } + }() + r, err := netroute.New() if err != nil { return Nexthop{}, fmt.Errorf("new netroute: %w", err) diff --git a/client/internal/routemanager/systemops/systemops_windows.go b/client/internal/routemanager/systemops/systemops_windows.go index 7bce6af80..9037e3ce1 100644 --- a/client/internal/routemanager/systemops/systemops_windows.go +++ b/client/internal/routemanager/systemops/systemops_windows.go @@ -211,6 +211,13 @@ func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRout } func (r *SysOps) addToRouteTable(prefix netip.Prefix, nexthop Nexthop) error { + startTime := time.Now() + defer func() { + if elapsed := time.Since(startTime); elapsed > 5*time.Millisecond { + log.Warnf("[TIMING] addToRouteTable(%s): %v", prefix, elapsed) + } + }() + log.Debugf("Adding route to %s via %s", prefix, nexthop) // if we don't have an interface but a zone, extract the interface index from the zone if nexthop.IP.Zone() != "" && nexthop.Intf == nil { @@ -266,16 +273,25 @@ func addRoute(prefix netip.Prefix, nexthop Nexthop) (err error) { } }() + setupStart := time.Now() route, setupErr := setupRouteEntry(prefix, nexthop) if setupErr != nil { return fmt.Errorf("setup route entry: %w", setupErr) } + setupDuration := time.Since(setupStart) route.Metric = 1 route.ValidLifetime = InfiniteLifetime route.PreferredLifetime = InfiniteLifetime - return createIPForwardEntry2(route) + apiStart := time.Now() + err = createIPForwardEntry2(route) + apiDuration := time.Since(apiStart) + + if setupDuration > 1*time.Millisecond || apiDuration > 1*time.Millisecond { + log.Warnf("[TIMING] addRoute(%s): setup=%v api=%v", prefix, setupDuration, apiDuration) + } + return err } // deleteRoute deletes a route using Windows iphelper APIs @@ -561,11 +577,14 @@ func cancelMibChangeNotify2(handle windows.Handle) error { // GetRoutesFromTable returns the current routing table from with prefixes only. // It caches the result for 2 seconds to avoid blocking the caller. func GetRoutesFromTable() ([]netip.Prefix, error) { + startTime := time.Now() + mux.Lock() defer mux.Unlock() // If many routes are added at the same time this might block for a long time (seconds to minutes), so we cache the result if !isCacheDisabled() && time.Since(lastUpdate) < 2*time.Second { + log.Warnf("[TIMING] GetRoutesFromTable: cache hit, returning %d routes in %v", len(prefixList), time.Since(startTime)) return prefixList, nil } @@ -580,17 +599,20 @@ func GetRoutesFromTable() ([]netip.Prefix, error) { } lastUpdate = time.Now() + log.Warnf("[TIMING] GetRoutesFromTable: fetched %d routes in %v", len(prefixList), time.Since(startTime)) return prefixList, nil } // GetRoutes retrieves the current routing table using WMI. func GetRoutes() ([]Route, error) { + startTime := time.Now() var entries []MSFT_NetRoute query := `SELECT DestinationPrefix, Nexthop, InterfaceIndex, InterfaceAlias, AddressFamily FROM MSFT_NetRoute` if err := wmi.QueryNamespace(query, &entries, `ROOT\StandardCimv2`); err != nil { return nil, fmt.Errorf("get routes: %w", err) } + log.Warnf("[TIMING] GetRoutes WMI query: fetched %d entries in %v", len(entries), time.Since(startTime)) var routes []Route for _, entry := range entries { @@ -903,6 +925,13 @@ func sortRouteCandidates(candidates []candidateRoute) { // 2. Lowest route metric // 3. Lowest interface metric func GetBestInterface(dest netip.Addr, vpnIntf string) (*net.Interface, error) { + startTime := time.Now() + defer func() { + if elapsed := time.Since(startTime); elapsed > 10*time.Millisecond { + log.Warnf("[TIMING] GetBestInterface(%s): %v", dest, elapsed) + } + }() + var skipInterfaceIndex int if vpnIntf != "" { if iface, err := net.InterfaceByName(vpnIntf); err == nil { @@ -913,11 +942,15 @@ func GetBestInterface(dest netip.Addr, vpnIntf string) (*net.Interface, error) { } } + tableStart := time.Now() table, err := getWindowsRoutingTable() if err != nil { return nil, fmt.Errorf("get routing table: %w", err) } defer freeWindowsRoutingTable(table) + if elapsed := time.Since(tableStart); elapsed > 5*time.Millisecond { + log.Warnf("[TIMING] GetBestInterface: getWindowsRoutingTable took %v", elapsed) + } candidates := parseCandidatesFromTable(table, dest, skipInterfaceIndex)