fix(management): private-service validation + tunnel-IP lookup semantics

- Require an explicit port for L4 cluster targets. validateL4Target
  exempted TargetTypeCluster from the port check, but buildPathMappings
  serializes every L4 target via net.JoinHostPort(host, port) — port=0
  shipped a ":0" upstream. Cluster targets use the same Host/Port
  fields, so the same requirement applies.
- GetPeerByIP returns NotFound on a tunnel-IP miss instead of mapping
  every error to Internal. The proxy's ValidateTunnelPeer probes IPs
  that legitimately aren't in the roster; the miss is expected and now
  distinguishable from a real store failure.
- Thread ctx into getClusterCapability's gorm query so a cancelled
  request doesn't keep the store busy.

Tests updated for the L4-cluster port requirement and the GetPeerByIP
NotFound path.
This commit is contained in:
mlsmaycon
2026-05-26 21:59:22 +02:00
parent 6600f0d45f
commit af416c656b
4 changed files with 46 additions and 4 deletions

View File

@@ -932,7 +932,11 @@ func (s *Service) validateL4Target(target *Target) error {
if target.TargetId == "" {
return errors.New("target_id is required for L4 services")
}
if target.TargetType != TargetTypeCluster && target.Port == 0 {
// Cluster targets resolve their upstream host:port from the target's
// own Host/Port fields just like the other L4 types — buildPathMappings
// emits net.JoinHostPort(target.Host, target.Port) for every L4
// target, so allowing port=0 here would let ":0" reach the proxy.
if target.Port == 0 {
return errors.New("target port is required for L4 services")
}
switch target.TargetType {

View File

@@ -1176,7 +1176,12 @@ func TestValidate_HTTPClusterTarget_RequiresDirectUpstream(t *testing.T) {
assert.ErrorContains(t, rp.Validate(), "direct upstream disabled", "cluster target must reject direct_upstream=false")
}
func TestValidate_L4ClusterTarget(t *testing.T) {
// TestValidate_L4ClusterTarget_RequiresPort confirms that an L4 cluster
// target without an explicit port is rejected. buildPathMappings emits
// net.JoinHostPort(target.Host, target.Port) for every L4 target — so
// allowing port=0 would let the proxy ship ":0" upstreams. The port
// requirement is the same as every other L4 target type.
func TestValidate_L4ClusterTarget_RequiresPort(t *testing.T) {
rp := validProxy()
rp.Mode = ModeTCP
rp.ListenPort = 9000
@@ -1186,7 +1191,12 @@ func TestValidate_L4ClusterTarget(t *testing.T) {
Protocol: "tcp",
Enabled: true,
}}
require.NoError(t, rp.Validate(), "L4 cluster target must validate without an explicit port")
assert.ErrorContains(t, rp.Validate(), "port is required",
"L4 cluster target must require an explicit port like other L4 target types")
rp.Targets[0].Port = 5432
rp.Targets[0].Host = "db.lan"
require.NoError(t, rp.Validate(), "L4 cluster target with host:port must validate")
}
func TestService_Copy_RoundtripsPrivate(t *testing.T) {

View File

@@ -4734,7 +4734,13 @@ func (s *SqlStore) GetPeerByIP(ctx context.Context, lockStrength LockingStrength
result := tx.
Take(&peer, fmt.Sprintf("account_id = ? AND %s = ?", column), accountID, jsonValue)
if result.Error != nil {
// no logging here
// A tunnel-IP miss is an expected outcome (e.g. the proxy's
// ValidateTunnelPeer probing an address that isn't in the
// account roster); surface it as NotFound so callers can tell
// it apart from a real store failure.
if errors.Is(result.Error, gorm.ErrRecordNotFound) {
return nil, status.Errorf(status.NotFound, "peer with ip %s not found", ip.String())
}
return nil, status.Errorf(status.Internal, "failed to get peer from store")
}
@@ -5962,6 +5968,7 @@ func (s *SqlStore) getClusterCapability(ctx context.Context, clusterAddr, column
}
err := s.db.
WithContext(ctx).
Model(&proxy.Proxy{}).
Select("COUNT(CASE WHEN "+column+" IS NOT NULL THEN 1 END) > 0 AS has_capability, "+
"COALESCE(MAX(CASE WHEN "+column+" = true THEN 1 ELSE 0 END), 0) = 1 AS any_true").

View File

@@ -491,6 +491,27 @@ func Test_GetAccount(t *testing.T) {
})
}
// TestSqlStore_GetPeerByIP_NotFound pins the not-found semantics the
// proxy's ValidateTunnelPeer relies on: a tunnel-IP that isn't in the
// account roster must surface as a NotFound error (not a generic
// Internal) so callers can distinguish an expected miss from a real
// store failure. A known IP still resolves.
func TestSqlStore_GetPeerByIP_NotFound(t *testing.T) {
runTestForAllEngines(t, "../testdata/store.sql", func(t *testing.T, store Store) {
const accountID = "bf1c8084-ba50-4ce7-9439-34653001fc3b"
peer, err := store.GetPeerByIP(context.Background(), LockingStrengthNone, accountID, net.ParseIP("192.168.0.0"))
require.NoError(t, err, "known tunnel IP must resolve")
require.NotNil(t, peer)
_, err = store.GetPeerByIP(context.Background(), LockingStrengthNone, accountID, net.ParseIP("100.65.0.99"))
require.Error(t, err, "unknown tunnel IP must error")
parsedErr, ok := status.FromError(err)
require.True(t, ok, "error must be a status error")
require.Equal(t, status.NotFound, parsedErr.Type(), "tunnel-IP miss must be NotFound, not Internal")
})
}
func TestSqlStore_SavePeer(t *testing.T) {
store, cleanUp, err := NewTestStoreFromSQL(context.Background(), "../testdata/store.sql", t.TempDir())
t.Cleanup(cleanUp)