diff --git a/go.mod b/go.mod index a67f115a8c4df..11b16bb57ee5f 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/google/nftables v0.2.0 github.com/google/uuid v1.3.0 github.com/goreleaser/nfpm v1.10.3 + github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/hdevalence/ed25519consensus v0.1.0 github.com/iancoleman/strcase v0.2.0 github.com/illarion/gonotify v1.0.1 diff --git a/go.sum b/go.sum index 39d17277ec67b..f67bc5c10f039 100644 --- a/go.sum +++ b/go.sum @@ -572,6 +572,8 @@ github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09 github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= diff --git a/net/netmon/netmon_darwin.go b/net/netmon/netmon_darwin.go index cc630112523fa..e4460a42d28a4 100644 --- a/net/netmon/netmon_darwin.go +++ b/net/netmon/netmon_darwin.go @@ -137,6 +137,11 @@ func (m *darwinRouteMon) skipRouteMessage(msg *route.RouteMessage) bool { // dst = fe80::b476:66ff:fe30:c8f6%15 return true } + if msg.Type == unix.RTM_GET { + // Skip RTM_GET messages, which are used to query the routing table. + // See netns_darwin.go + return true + } return false } diff --git a/net/netns/netns_android.go b/net/netns/netns_android.go index 162e5c79a62fa..80853ebdd0539 100644 --- a/net/netns/netns_android.go +++ b/net/netns/netns_android.go @@ -54,6 +54,10 @@ func control(logger.Logf, *netmon.Monitor) func(network, address string, c sysca return controlC } +func ClearRouteCache() { + // There's no route cache to clear on Android. +} + // controlC marks c as necessary to dial in a separate network namespace. // // It's intentionally the same signature as net.Dialer.Control diff --git a/net/netns/netns_darwin.go b/net/netns/netns_darwin.go index 0bcd84a21c0a3..702795f45f749 100644 --- a/net/netns/netns_darwin.go +++ b/net/netns/netns_darwin.go @@ -13,13 +13,17 @@ import ( "net/netip" "os" "strings" + "sync" "syscall" + "time" + "github.com/hashicorp/golang-lru/v2/expirable" "golang.org/x/net/route" "golang.org/x/sys/unix" "tailscale.com/envknob" "tailscale.com/net/interfaces" "tailscale.com/net/netmon" + "tailscale.com/net/tsaddr" "tailscale.com/types/logger" ) @@ -33,60 +37,153 @@ var bindToInterfaceByRouteEnv = envknob.RegisterBool("TS_BIND_TO_INTERFACE_BY_RO var errInterfaceStateInvalid = errors.New("interface state invalid") -// controlLogf marks c as necessary to dial in a separate network namespace. -// -// It's intentionally the same signature as net.Dialer.Control -// and net.ListenConfig.Control. -func controlLogf(logf logger.Logf, netMon *netmon.Monitor, network, address string, c syscall.RawConn) error { - if isLocalhost(address) { - // Don't bind to an interface for localhost connections. - return nil +// routeCache caches the results of interfaceIndexFor calls to avoid +// spamming the AF_ROUTE socket. This is used for soft +// isolation mode where we do many route lookups. +var ( + routeCache *expirable.LRU[string, int] + routeCacheOnce sync.Once +) + +func getRouteCache() *expirable.LRU[string, int] { + routeCacheOnce.Do(func() { + routeCache = expirable.NewLRU[string, int](256, nil, 15*time.Second) + }) + return routeCache +} + +// ClearRouteCache clears the route cache. This should be called by the +// network monitor when a link changes occur. +func ClearRouteCache() { + // The route cache is only used in Coder soft isolation mode. + if coderSoftIsolation.Load() { + getRouteCache().Purge() } +} - if disableBindConnToInterface.Load() { - logf("netns_darwin: binding connection to interfaces disabled") +// isInterfaceCoderInterface can be swapped out in tests. +var isInterfaceCoderInterface func(int) bool = isInterfaceCoderInterfaceDefault + +func isInterfaceCoderInterfaceDefault(idx int) bool { + _, tsif, err := interfaces.Coder() + return err == nil && tsif != nil && tsif.Index == idx +} + +// controlLogf binds c to the default interface if it would otherwise +// be bound to the Coder interface. +func controlLogf(logf logger.Logf, netMon *netmon.Monitor, network, address string, c syscall.RawConn) error { + if !shouldBindToDefaultInterface(logf, netMon, address) { return nil } - idx, err := getInterfaceIndex(logf, netMon, address) + idx, err := getDefaultInterfaceIndex(logf, netMon) if err != nil { - // callee logged return nil } return bindConnToInterface(c, network, address, idx, logf) } -func getInterfaceIndex(logf logger.Logf, netMon *netmon.Monitor, address string) (int, error) { - // Helper so we can log errors. - defaultIdx := func() (int, error) { - if netMon == nil { - idx, err := interfaces.DefaultRouteInterfaceIndex() - if err != nil { - // It's somewhat common for there to be no default gateway route - // (e.g. on a phone with no connectivity), don't log those errors - // since they are expected. - if !errors.Is(err, interfaces.ErrNoGatewayIndexFound) { - logf("[unexpected] netns: DefaultRouteInterfaceIndex: %v", err) - } - return -1, err - } - return idx, nil +// parseAddrForRouting returns the IP address for the given address, or an invalid +// address if the address is not specified. +func parseAddrForRouting(address string) (netip.Addr, error) { + host, _, err := net.SplitHostPort(address) + if err != nil { + return netip.Addr{}, fmt.Errorf("invalid address %q: %w", address, err) + } + if host == "" { + // netip.ParseAddr("") will fail + return netip.Addr{}, nil + } + + addr, err := netip.ParseAddr(host) + if err != nil { + return netip.Addr{}, fmt.Errorf("invalid address %q: %w", address, err) + } + if addr.Zone() != "" { + // We're not supporting addresses with zones right now. + return netip.Addr{}, fmt.Errorf("invalid address %q, has zone: %q", address, addr.Zone()) + } + + return addr, nil +} + +func shouldBindToDefaultInterface(logf logger.Logf, _ *netmon.Monitor, address string) bool { + if isLocalhost(address) { + // Don't bind to an interface for localhost connections. + return false + } + + if coderSoftIsolation.Load() { + addr, err := parseAddrForRouting(address) + if err != nil { + logf("[unexpected] netns: Coder soft isolation: error parsing address %q, binding to default: %v", address, err) + return true + } + if !addr.IsValid() || addr.IsUnspecified() { + // Unspecified addresses should not be bound to any interface. + return false + } + + // Shortcut if the addr is a Coder IP, which we should always bind to + // the default interface. + if tsaddr.IsCoderIP(addr) { + return true } - state := netMon.InterfaceState() - if state == nil { - return -1, errInterfaceStateInvalid + + // Ask Darwin routing table to find the best interface for this address + // by using cached route lookups to avoid spamming the AF_ROUTE socket. + idx, err := getBestInterfaceCached(addr) + if err != nil { + logf("[unexpected] netns: Coder soft isolation: error getting best interface, binding to default: %v", err) + return true } - if iface, ok := state.Interface[state.DefaultRouteInterface]; ok { - return iface.Index, nil + if isInterfaceCoderInterface(idx) { + logf("[unexpected] netns: Coder soft isolation: detected socket destined for Coder interface, binding to default") + return true } + + // It doesn't look like our own interface, so we don't need to bind the + // socket to the default interface. + return false + } + + // The default isolation behavior is to always bind to the default + // interface. + return true +} + +func getDefaultInterfaceIndex(logf logger.Logf, netMon *netmon.Monitor) (int, error) { + if netMon == nil { + idx, err := interfaces.DefaultRouteInterfaceIndex() + if err != nil { + // It's somewhat common for there to be no default gateway route + // (e.g. on a phone with no connectivity), don't log those errors + // since they are expected. + if !errors.Is(err, interfaces.ErrNoGatewayIndexFound) { + logf("[unexpected] netns: DefaultRouteInterfaceIndex: %v", err) + } + return -1, err + } + return idx, nil + } + + state := netMon.InterfaceState() + if state == nil { return -1, errInterfaceStateInvalid } + if iface, ok := state.Interface[state.DefaultRouteInterface]; ok { + return iface.Index, nil + } + return -1, errInterfaceStateInvalid +} + +func getInterfaceIndex(logf logger.Logf, netMon *netmon.Monitor, address string) (int, error) { useRoute := bindToInterfaceByRoute.Load() || bindToInterfaceByRouteEnv() if !useRoute { - return defaultIdx() + return getDefaultInterfaceIndex(logf, netMon) } host, _, err := net.SplitHostPort(address) @@ -99,13 +196,13 @@ func getInterfaceIndex(logf logger.Logf, netMon *netmon.Monitor, address string) addr, err := netip.ParseAddr(host) if err != nil { logf("[unexpected] netns: error parsing address %q: %v", host, err) - return defaultIdx() + return getDefaultInterfaceIndex(logf, netMon) } idx, err := interfaceIndexFor(addr, true /* canRecurse */) if err != nil { logf("netns: error in interfaceIndexFor: %v", err) - return defaultIdx() + return getDefaultInterfaceIndex(logf, netMon) } // Verify that we didn't just choose the Coder interface; @@ -113,7 +210,7 @@ func getInterfaceIndex(logf logger.Logf, netMon *netmon.Monitor, address string) _, tsif, err2 := interfaces.Coder() if err2 == nil && tsif != nil && tsif.Index == idx { logf("[unexpected] netns: interfaceIndexFor returned Coder interface") - return defaultIdx() + return getDefaultInterfaceIndex(logf, netMon) } return idx, err @@ -225,6 +322,31 @@ func interfaceIndexFor(addr netip.Addr, canRecurse bool) (int, error) { return 0, fmt.Errorf("no valid address found") } +// getBestInterfaceCached returns the interface index that we should bind to in +// order to send traffic to the provided address, using a cache to avoid +// spamming the AF_ROUTE socket. +func getBestInterfaceCached(addr netip.Addr) (int, error) { + cache := getRouteCache() + key := addr.String() + + // Check cache first + if idx, ok := cache.Get(key); ok { + return idx, nil + } + + // Cache miss, do the actual lookup + idx, err := interfaceIndexFor(addr, true /* canRecurse */) + if err != nil { + // Don't cache errors + return idx, err + } + + // Cache only successful results + cache.Add(key, idx) + + return idx, nil +} + // SetListenConfigInterfaceIndex sets lc.Control such that sockets are bound // to the provided interface index. func SetListenConfigInterfaceIndex(lc *net.ListenConfig, ifIndex int) error { diff --git a/net/netns/netns_darwin_test.go b/net/netns/netns_darwin_test.go index e11b3cba6bb31..ea51bf242d37e 100644 --- a/net/netns/netns_darwin_test.go +++ b/net/netns/netns_darwin_test.go @@ -4,11 +4,94 @@ package netns import ( + "strconv" "testing" "tailscale.com/net/interfaces" ) +func TestShouldBindToDefaultInterface(t *testing.T) { + t.Run("Normal", func(t *testing.T) { + tests := []struct { + address string + want bool + }{ + {"127.0.0.1:0", false}, + {"127.0.0.1:1234", false}, + {"1.2.3.4:0", true}, + {"1.2.3.4:1234", true}, + } + + for _, test := range tests { + t.Run(test.address, func(t *testing.T) { + got := shouldBindToDefaultInterface(t.Logf, nil, test.address) + if got != test.want { + t.Errorf("want %v, got %v", test.want, got) + } + }) + } + }) + + t.Run("CoderSoftIsolation", func(t *testing.T) { + SetCoderSoftIsolation(true) + t.Cleanup(func() { + SetCoderSoftIsolation(false) + }) + + tests := []struct { + address string + isCoderInterface bool + want bool + }{ + // isCoderInterface shouldn't even matter for localhost since it has + // a special exemption. + {"127.0.0.1:0", false, false}, + {"127.0.0.1:0", true, false}, + {"127.0.0.1:1234", false, false}, + {"127.0.0.1:1234", true, false}, + + {"1.2.3.4:0", false, false}, + {"1.2.3.4:0", true, true}, + {"1.2.3.4:1234", false, false}, + {"1.2.3.4:1234", true, true}, + + // Unspecified addresses should not be bound to any interface. + {":0", false, false}, + {":0", true, false}, + {":1234", false, false}, + {":1234", true, false}, + {"0.0.0.0:1234", false, false}, + {"0.0.0.0:1234", true, false}, + {"[::]:1234", false, false}, + {"[::]:1234", true, false}, + + // Special cases should always bind to default: + {"[::%eth0]:1234", false, true}, // zones are not supported + {"1.2.3.4:", false, true}, // port is empty + {"1.2.3.4:a", false, true}, // port is not a number + {"1.2.3.4:-1", false, true}, // port is negative + {"1.2.3.4:65536", false, true}, // port is too large + } + + for _, test := range tests { + name := test.address + " (isCoderInterface=" + strconv.FormatBool(test.isCoderInterface) + ")" + t.Run(name, func(t *testing.T) { + isInterfaceCoderInterface = func(_ int) bool { + return test.isCoderInterface + } + defer func() { + isInterfaceCoderInterface = isInterfaceCoderInterfaceDefault + }() + + got := shouldBindToDefaultInterface(t.Logf, nil, test.address) + if got != test.want { + t.Errorf("want %v, got %v", test.want, got) + } + }) + } + }) +} + func TestGetInterfaceIndex(t *testing.T) { oldVal := bindToInterfaceByRoute.Load() t.Cleanup(func() { bindToInterfaceByRoute.Store(oldVal) }) diff --git a/net/netns/netns_default.go b/net/netns/netns_default.go index 94f24d8fa4e19..94097a4062605 100644 --- a/net/netns/netns_default.go +++ b/net/netns/netns_default.go @@ -20,3 +20,5 @@ func control(logger.Logf, *netmon.Monitor) func(network, address string, c sysca func controlC(network, address string, c syscall.RawConn) error { return nil } + +func ClearRouteCache() {} diff --git a/net/netns/netns_linux.go b/net/netns/netns_linux.go index bac14e9d778cf..3d4e51530f0d3 100644 --- a/net/netns/netns_linux.go +++ b/net/netns/netns_linux.go @@ -20,6 +20,10 @@ import ( "tailscale.com/util/linuxfw" ) +func ClearRouteCache() { + // There's no route cache to clear on Linux +} + // socketMarkWorksOnce is the sync.Once & cached value for useSocketMark. var socketMarkWorksOnce struct { sync.Once diff --git a/net/netns/netns_windows.go b/net/netns/netns_windows.go index dd733542fb56f..ca0290852a963 100644 --- a/net/netns/netns_windows.go +++ b/net/netns/netns_windows.go @@ -48,6 +48,10 @@ func control(logf logger.Logf, netMon *netmon.Monitor) func(network, address str } } +func ClearRouteCache() { + // There's no route cache to clear on Windows +} + // controlC binds c to the Windows interface that holds a default // route, and is not the Tailscale WinTun interface. func controlLogf(logf logger.Logf, _ *netmon.Monitor, network, address string, c syscall.RawConn) error { @@ -206,7 +210,7 @@ func getSockAddr(address string) (windows.Sockaddr, error) { if addr.Zone() != "" { // Addresses with zones *can* be represented as a Sockaddr with extra // effort, but we don't use or support them currently. - return nil, fmt.Errorf("invalid address %q, has zone: %w", address, err) + return nil, fmt.Errorf("invalid address %q, has zone: %q", address, addr.Zone()) } if addr.IsUnspecified() { // This covers the cases of 0.0.0.0 and [::]. diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 64f948c9ba89e..cc4bd6240b509 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -28,6 +28,7 @@ import ( "tailscale.com/net/flowtrack" "tailscale.com/net/interfaces" "tailscale.com/net/netmon" + "tailscale.com/net/netns" "tailscale.com/net/packet" "tailscale.com/net/sockstats" "tailscale.com/net/tsaddr" @@ -1137,6 +1138,7 @@ func (e *userspaceEngine) linkChange(changed bool, cur *interfaces.State) { if err := e.dns.FlushCaches(); err != nil { e.logf("wgengine: dns flush failed after major link change: %v", err) } + netns.ClearRouteCache() } // Hacky workaround for Linux DNS issue 2458: on