Merge pull request #204 from LaurenceJJones/optimize-subnet-lookup-bart

perf(netstack2): optimize subnet rule matching with BART
This commit is contained in:
Owen Schwartz
2026-03-02 18:10:01 -08:00
committed by GitHub
4 changed files with 206 additions and 109 deletions

1
go.mod
View File

@@ -4,6 +4,7 @@ go 1.25
require (
github.com/docker/docker v28.5.2+incompatible
github.com/gaissmai/bart v0.26.0
github.com/gorilla/websocket v1.5.3
github.com/prometheus/client_golang v1.23.2
github.com/vishvananda/netlink v1.3.1

2
go.sum
View File

@@ -26,6 +26,8 @@ github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/gaissmai/bart v0.26.0 h1:xOZ57E9hJLBiQaSyeZa9wgWhGuzfGACgqp4BE77OkO0=
github.com/gaissmai/bart v0.26.0/go.mod h1:GREWQfTLRWz/c5FTOsIw+KkscuFkIV5t8Rp7Nd1Td5c=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=

View File

@@ -48,115 +48,6 @@ type SubnetRule struct {
PortRanges []PortRange // empty slice means all ports allowed
}
// ruleKey is used as a map key for fast O(1) lookups
type ruleKey struct {
sourcePrefix string
destPrefix string
}
// SubnetLookup provides fast IP subnet and port matching with O(1) lookup performance
type SubnetLookup struct {
mu sync.RWMutex
rules map[ruleKey]*SubnetRule // Map for O(1) lookups by prefix combination
}
// NewSubnetLookup creates a new subnet lookup table
func NewSubnetLookup() *SubnetLookup {
return &SubnetLookup{
rules: make(map[ruleKey]*SubnetRule),
}
}
// AddSubnet adds a subnet rule with source and destination prefixes and optional port restrictions
// If portRanges is nil or empty, all ports are allowed for this subnet
// rewriteTo can be either an IP/CIDR (e.g., "192.168.1.1/32") or a domain name (e.g., "example.com")
func (sl *SubnetLookup) AddSubnet(sourcePrefix, destPrefix netip.Prefix, rewriteTo string, portRanges []PortRange, disableIcmp bool) {
sl.mu.Lock()
defer sl.mu.Unlock()
key := ruleKey{
sourcePrefix: sourcePrefix.String(),
destPrefix: destPrefix.String(),
}
sl.rules[key] = &SubnetRule{
SourcePrefix: sourcePrefix,
DestPrefix: destPrefix,
DisableIcmp: disableIcmp,
RewriteTo: rewriteTo,
PortRanges: portRanges,
}
}
// RemoveSubnet removes a subnet rule from the lookup table
func (sl *SubnetLookup) RemoveSubnet(sourcePrefix, destPrefix netip.Prefix) {
sl.mu.Lock()
defer sl.mu.Unlock()
key := ruleKey{
sourcePrefix: sourcePrefix.String(),
destPrefix: destPrefix.String(),
}
delete(sl.rules, key)
}
// Match checks if a source IP, destination IP, port, and protocol match any subnet rule
// Returns the matched rule if ALL of these conditions are met:
// - The source IP is in the rule's source prefix
// - The destination IP is in the rule's destination prefix
// - The port is in an allowed range (or no port restrictions exist)
// - The protocol matches (or the port range allows both protocols)
//
// proto should be header.TCPProtocolNumber or header.UDPProtocolNumber
// Returns nil if no rule matches
func (sl *SubnetLookup) Match(srcIP, dstIP netip.Addr, port uint16, proto tcpip.TransportProtocolNumber) *SubnetRule {
sl.mu.RLock()
defer sl.mu.RUnlock()
// Iterate through all rules to find matching source and destination prefixes
// This is O(n) but necessary since we need to check prefix containment, not exact match
for _, rule := range sl.rules {
// Check if source and destination IPs match their respective prefixes
if !rule.SourcePrefix.Contains(srcIP) {
continue
}
if !rule.DestPrefix.Contains(dstIP) {
continue
}
if rule.DisableIcmp && (proto == header.ICMPv4ProtocolNumber || proto == header.ICMPv6ProtocolNumber) {
// ICMP is disabled for this subnet
return nil
}
// Both IPs match - now check port restrictions
// If no port ranges specified, all ports are allowed
if len(rule.PortRanges) == 0 {
return rule
}
// Check if port and protocol are in any of the allowed ranges
for _, pr := range rule.PortRanges {
if port >= pr.Min && port <= pr.Max {
// Check protocol compatibility
if pr.Protocol == "" {
// Empty protocol means allow both TCP and UDP
return rule
}
// Check if the packet protocol matches the port range protocol
if (pr.Protocol == "tcp" && proto == header.TCPProtocolNumber) ||
(pr.Protocol == "udp" && proto == header.UDPProtocolNumber) {
return rule
}
// Port matches but protocol doesn't - continue checking other ranges
}
}
}
return nil
}
// connKey uniquely identifies a connection for NAT tracking
type connKey struct {
srcIP string

203
netstack2/subnet_lookup.go Normal file
View File

@@ -0,0 +1,203 @@
package netstack2
import (
"net/netip"
"sync"
"github.com/gaissmai/bart"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
)
// SubnetLookup provides fast IP subnet and port matching using BART (Binary Aggregated Range Tree)
// This uses BART Table for O(log n) prefix matching with Supernets() for efficient lookups
//
// Architecture:
// - Two-level BART structure for matching both source AND destination prefixes
// - Level 1: Source prefix -> Level 2 (destination prefix -> rules)
// - This reduces search space: only check destination prefixes for matching source prefixes
type SubnetLookup struct {
mu sync.RWMutex
// Two-level BART structure:
// Level 1: Source prefix -> Level 2 (destination prefix -> rules)
// This allows us to first match source prefix, then only check destination prefixes
// for matching source prefixes, reducing the search space significantly
sourceTrie *bart.Table[*destTrie]
}
// destTrie is a BART for destination prefixes, containing the actual rules
type destTrie struct {
trie *bart.Table[[]*SubnetRule]
rules []*SubnetRule // All rules for this source prefix (for iteration if needed)
}
// NewSubnetLookup creates a new subnet lookup table using BART
func NewSubnetLookup() *SubnetLookup {
return &SubnetLookup{
sourceTrie: &bart.Table[*destTrie]{},
}
}
// prefixEqual compares two prefixes after masking to handle host bits correctly.
// For example, 10.0.0.5/24 and 10.0.0.0/24 are treated as equal.
func prefixEqual(a, b netip.Prefix) bool {
return a.Masked() == b.Masked()
}
// AddSubnet adds a subnet rule with source and destination prefixes and optional port restrictions
// If portRanges is nil or empty, all ports are allowed for this subnet
// rewriteTo can be either an IP/CIDR (e.g., "192.168.1.1/32") or a domain name (e.g., "example.com")
func (sl *SubnetLookup) AddSubnet(sourcePrefix, destPrefix netip.Prefix, rewriteTo string, portRanges []PortRange, disableIcmp bool) {
sl.mu.Lock()
defer sl.mu.Unlock()
rule := &SubnetRule{
SourcePrefix: sourcePrefix,
DestPrefix: destPrefix,
DisableIcmp: disableIcmp,
RewriteTo: rewriteTo,
PortRanges: portRanges,
}
// Canonicalize source prefix to handle host bits correctly
canonicalSourcePrefix := sourcePrefix.Masked()
// Get or create destination trie for this source prefix
destTriePtr, exists := sl.sourceTrie.Get(canonicalSourcePrefix)
if !exists {
// Create new destination trie for this source prefix
destTriePtr = &destTrie{
trie: &bart.Table[[]*SubnetRule]{},
rules: make([]*SubnetRule, 0),
}
sl.sourceTrie.Insert(canonicalSourcePrefix, destTriePtr)
}
// Canonicalize destination prefix to handle host bits correctly
// BART masks prefixes internally, so we need to match that behavior in our bookkeeping
canonicalDestPrefix := destPrefix.Masked()
// Add rule to destination trie
// Original behavior: overwrite if same (sourcePrefix, destPrefix) exists
// Store as single-element slice to match original overwrite behavior
destTriePtr.trie.Insert(canonicalDestPrefix, []*SubnetRule{rule})
// Update destTriePtr.rules - remove old rule with same canonical prefix if exists, then add new one
// Use canonical comparison to handle cases like 10.0.0.5/24 vs 10.0.0.0/24
newRules := make([]*SubnetRule, 0, len(destTriePtr.rules)+1)
for _, r := range destTriePtr.rules {
if !prefixEqual(r.DestPrefix, canonicalDestPrefix) || !prefixEqual(r.SourcePrefix, canonicalSourcePrefix) {
newRules = append(newRules, r)
}
}
newRules = append(newRules, rule)
destTriePtr.rules = newRules
}
// RemoveSubnet removes a subnet rule from the lookup table
func (sl *SubnetLookup) RemoveSubnet(sourcePrefix, destPrefix netip.Prefix) {
sl.mu.Lock()
defer sl.mu.Unlock()
// Canonicalize prefixes to handle host bits correctly
canonicalSourcePrefix := sourcePrefix.Masked()
canonicalDestPrefix := destPrefix.Masked()
destTriePtr, exists := sl.sourceTrie.Get(canonicalSourcePrefix)
if !exists {
return
}
// Remove the rule - original behavior: delete exact (sourcePrefix, destPrefix) combination
// BART masks prefixes internally, so Delete works with canonical form
destTriePtr.trie.Delete(canonicalDestPrefix)
// Also remove from destTriePtr.rules using canonical comparison
// This ensures we remove rules even if they were added with host bits set
newDestRules := make([]*SubnetRule, 0, len(destTriePtr.rules))
for _, r := range destTriePtr.rules {
if !prefixEqual(r.DestPrefix, canonicalDestPrefix) || !prefixEqual(r.SourcePrefix, canonicalSourcePrefix) {
newDestRules = append(newDestRules, r)
}
}
destTriePtr.rules = newDestRules
// Check if the trie is actually empty using BART's Size() method
// This is more efficient than iterating and ensures we clean up empty tries
// even if there were stale entries in the rules slice (which shouldn't happen
// with proper canonicalization, but this provides a definitive check)
if destTriePtr.trie.Size() == 0 {
sl.sourceTrie.Delete(canonicalSourcePrefix)
}
}
// Match checks if a source IP, destination IP, port, and protocol match any subnet rule
// Returns the matched rule if ALL of these conditions are met:
// - The source IP is in the rule's source prefix
// - The destination IP is in the rule's destination prefix
// - The port is in an allowed range (or no port restrictions exist)
// - The protocol matches (or the port range allows both protocols)
//
// proto should be header.TCPProtocolNumber, header.UDPProtocolNumber, or header.ICMPv4ProtocolNumber
// Returns nil if no rule matches
// This uses BART's Supernets() for O(log n) prefix matching instead of O(n) iteration
func (sl *SubnetLookup) Match(srcIP, dstIP netip.Addr, port uint16, proto tcpip.TransportProtocolNumber) *SubnetRule {
sl.mu.RLock()
defer sl.mu.RUnlock()
// Convert IP addresses to /32 (IPv4) or /128 (IPv6) prefixes
// Supernets() finds all prefixes that contain this IP (i.e., are supernets of /32 or /128)
srcPrefix := netip.PrefixFrom(srcIP, srcIP.BitLen())
dstPrefix := netip.PrefixFrom(dstIP, dstIP.BitLen())
// Step 1: Find all source prefixes that contain srcIP using BART's Supernets
// This is O(log n) instead of O(n) iteration
// Supernets returns all prefixes that are supernets (contain) the given prefix
for _, destTriePtr := range sl.sourceTrie.Supernets(srcPrefix) {
if destTriePtr == nil {
continue
}
// Step 2: Find all destination prefixes that contain dstIP
// This is also O(log n) for each matching source prefix
for _, rules := range destTriePtr.trie.Supernets(dstPrefix) {
if rules == nil {
continue
}
// Step 3: Check each rule for ICMP and port restrictions
for _, rule := range rules {
// Check if ICMP is disabled for this rule
if rule.DisableIcmp && (proto == header.ICMPv4ProtocolNumber || proto == header.ICMPv6ProtocolNumber) {
// ICMP is disabled for this subnet
return nil
}
// Check port restrictions
if len(rule.PortRanges) == 0 {
// No port restrictions, match!
return rule
}
// Check if port and protocol are in any of the allowed ranges
for _, pr := range rule.PortRanges {
if port >= pr.Min && port <= pr.Max {
// Check protocol compatibility
if pr.Protocol == "" {
// Empty protocol means allow both TCP and UDP
return rule
}
// Check if the packet protocol matches the port range protocol
if (pr.Protocol == "tcp" && proto == header.TCPProtocolNumber) ||
(pr.Protocol == "udp" && proto == header.UDPProtocolNumber) {
return rule
}
// Port matches but protocol doesn't - continue checking other ranges
}
}
}
}
}
return nil
}