diff --git a/common/docs/containers.conf.5.md b/common/docs/containers.conf.5.md index 5a11c647ad..cd428573d7 100644 --- a/common/docs/containers.conf.5.md +++ b/common/docs/containers.conf.5.md @@ -488,6 +488,15 @@ default_subnet_pools = [ Configure which rootless network program to use by default. Valid options are `slirp4netns` and `pasta` (default). +**rootless_port_forwarder**="rootlessport" + +Select the port forwarding mechanism for rootless bridge networks. +Valid options are `rootlessport` (default) and `pasta`. +`rootlessport` uses a userspace TCP/UDP proxy. +`pasta` uses pasta's control socket to add port forwarding rules via kernel splice, +which preserves the original source IP address inside the container. +The `pasta` option is **experimental** and subject to change. + **network_config_dir**="/etc/containers/networks" Path to the directory where network configuration files are located. diff --git a/common/libnetwork/internal/rootlessnetns/netns_linux.go b/common/libnetwork/internal/rootlessnetns/netns_linux.go index 687b421029..c11dc07d7c 100644 --- a/common/libnetwork/internal/rootlessnetns/netns_linux.go +++ b/common/libnetwork/internal/rootlessnetns/netns_linux.go @@ -40,6 +40,11 @@ const ( // rootlessNetNsConnPidFile is the name of the rootless netns slirp4netns/pasta pid file. rootlessNetNsConnPidFile = "rootless-netns-conn.pid" + // pestoSocketFile is the name of the UNIX domain socket file used by + // pesto to communicate with the running pasta instance. Pasta is started + // with "-c " to enable this control channel. + pestoSocketFile = "pasta.sock" + tmpfs = "tmpfs" none = "none" resolvConfName = "resolv.conf" @@ -198,10 +203,18 @@ func (n *Netns) cleanup() error { func (n *Netns) setupPasta(nsPath string) error { pidPath := n.getPath(rootlessNetNsConnPidFile) + extraOpts := []string{"--pid", pidPath} + + var socketPath string + if n.config.Network.RootlessPortForwarder == config.RootlessPortForwarderPasta { + socketPath = n.getPath(pestoSocketFile) + extraOpts = append(extraOpts, "-c", socketPath) + } + pastaOpts := pasta.SetupOptions{ Config: n.config, Netns: nsPath, - ExtraOptions: []string{"--pid", pidPath}, + ExtraOptions: extraOpts, } res, err := pasta.Setup(&pastaOpts) if err != nil { @@ -235,9 +248,10 @@ func (n *Netns) setupPasta(nsPath string) error { } n.info = &types.RootlessNetnsInfo{ - IPAddresses: res.IPAddresses, - DnsForwardIps: res.DNSForwardIPs, - MapGuestIps: res.MapGuestAddrIPs, + IPAddresses: res.IPAddresses, + DnsForwardIps: res.DNSForwardIPs, + MapGuestIps: res.MapGuestAddrIPs, + PestoSocketPath: socketPath, } if err := n.serializeInfo(); err != nil { return wrapError("serialize info", err) diff --git a/common/libnetwork/netavark/network.go b/common/libnetwork/netavark/network.go index e1021309a2..c06d0486f2 100644 --- a/common/libnetwork/netavark/network.go +++ b/common/libnetwork/netavark/network.go @@ -71,6 +71,12 @@ type netavarkNetwork struct { // rootlessNetns is used for the rootless network setup/teardown rootlessNetns *rootlessnetns.Netns + + // rootlessPortForwarder is the value of config.RootlessPortForwarder from + // containers.conf. When set to config.RootlessPortForwarderPasta, HostIP + // is stripped from port mappings before passing to netavark because pasta's + // splice changes the destination IP. + rootlessPortForwarder string } type InitConfig struct { @@ -145,21 +151,22 @@ func NewNetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) { } n := &netavarkNetwork{ - networkConfigDir: conf.NetworkConfigDir, - networkRunDir: conf.NetworkRunDir, - netavarkBinary: conf.NetavarkBinary, - aardvarkBinary: conf.AardvarkBinary, - networkRootless: useRootlessNetns, - ipamDBPath: filepath.Join(conf.NetworkRunDir, "ipam.db"), - firewallDriver: conf.Config.Network.FirewallDriver, - defaultNetwork: defaultNetworkName, - defaultSubnet: defaultNet, - defaultsubnetPools: defaultSubnetPools, - dnsBindPort: conf.Config.Network.DNSBindPort, - pluginDirs: conf.Config.Network.NetavarkPluginDirs.Get(), - lock: lock, - syslog: conf.Syslog, - rootlessNetns: netns, + networkConfigDir: conf.NetworkConfigDir, + networkRunDir: conf.NetworkRunDir, + netavarkBinary: conf.NetavarkBinary, + aardvarkBinary: conf.AardvarkBinary, + networkRootless: useRootlessNetns, + ipamDBPath: filepath.Join(conf.NetworkRunDir, "ipam.db"), + firewallDriver: conf.Config.Network.FirewallDriver, + defaultNetwork: defaultNetworkName, + defaultSubnet: defaultNet, + defaultsubnetPools: defaultSubnetPools, + dnsBindPort: conf.Config.Network.DNSBindPort, + pluginDirs: conf.Config.Network.NetavarkPluginDirs.Get(), + lock: lock, + syslog: conf.Syslog, + rootlessNetns: netns, + rootlessPortForwarder: conf.Config.Network.RootlessPortForwarder, } return n, nil diff --git a/common/libnetwork/netavark/run.go b/common/libnetwork/netavark/run.go index b172c2e654..0a385edc5d 100644 --- a/common/libnetwork/netavark/run.go +++ b/common/libnetwork/netavark/run.go @@ -12,6 +12,7 @@ import ( "github.com/sirupsen/logrus" "go.podman.io/common/libnetwork/internal/util" "go.podman.io/common/libnetwork/types" + "go.podman.io/common/pkg/config" ) type netavarkOptions struct { @@ -162,6 +163,18 @@ func (n *netavarkNetwork) getCommonNetavarkOptions(needPlugin bool) []string { } func (n *netavarkNetwork) convertNetOpts(opts types.NetworkOptions) (*netavarkOptions, bool, error) { + // In pasta mode, strip HostIP from port mappings. Pasta handles host-side + // address binding; netavark only needs DNAT rules inside the netns without + // "ip daddr" constraints (pasta's splice changes the destination IP). + if n.rootlessPortForwarder == config.RootlessPortForwarderPasta && n.networkRootless && len(opts.PortMappings) > 0 { + stripped := make([]types.PortMapping, len(opts.PortMappings)) + copy(stripped, opts.PortMappings) + for i := range stripped { + stripped[i].HostIP = "" + } + opts.PortMappings = stripped + } + netavarkOptions := netavarkOptions{ NetworkOptions: opts, Networks: make(map[string]*types.Network, len(opts.Networks)), diff --git a/common/libnetwork/pasta/pesto_linux.go b/common/libnetwork/pasta/pesto_linux.go new file mode 100644 index 0000000000..bd8f4b6639 --- /dev/null +++ b/common/libnetwork/pasta/pesto_linux.go @@ -0,0 +1,127 @@ +// Pesto client for dynamic port forwarding on a running pasta instance. +// +// Pesto updates pasta's forwarding table via a UNIX domain socket (-c). +// Used by rootless bridge networking: pesto incrementally adds or deletes +// port forwarding rules for individual containers. +// +// Passt only forwards traffic from the host into the rootless netns. +// Netavark handles the final DNAT to the container IP:ContainerPort +// inside the netns. Each mapping uses HostPort as both source and +// destination so traffic arrives at the port netavark expects. +// +// When no HostIP is specified, pesto binds both IPv4 (0.0.0.0) and +// IPv6 ([::]) so dual-stack networks work out of the box. +// +// Limitations: +// - TCP and UDP only (SCTP is silently skipped) + +package pasta + +import ( + "errors" + "fmt" + "os/exec" + "strings" + + "github.com/sirupsen/logrus" + "go.podman.io/common/libnetwork/types" + "go.podman.io/common/pkg/config" +) + +const PestoBinaryName = "pesto" + +// PestoAddPorts adds port forwarding rules to the running pasta instance +// via -A/--add. Idempotent: adding already-active ports is a no-op. +func PestoAddPorts(conf *config.Config, socketPath string, ports []types.PortMapping) error { + if socketPath == "" { + return errors.New("pesto control socket not available") + } + logrus.Debugf("pesto: adding %d port mappings", len(ports)) + return pestoModifyPorts(conf, socketPath, ports, "--add") +} + +// PestoDeletePorts removes port forwarding rules from the running pasta +// instance via -D/--delete. +func PestoDeletePorts(conf *config.Config, socketPath string, ports []types.PortMapping) error { + if socketPath == "" { + return nil + } + logrus.Debugf("pesto: deleting %d port mappings", len(ports)) + return pestoModifyPorts(conf, socketPath, ports, "--delete") +} + +func pestoModifyPorts(conf *config.Config, socketPath string, ports []types.PortMapping, mode string) error { + pestoPath, err := conf.FindHelperBinary(PestoBinaryName, true) + if err != nil { + return fmt.Errorf("could not find pesto binary: %w", err) + } + + pestoArgs, err := portMappingsToPestoArgs(ports) + if err != nil { + return err + } + args := make([]string, 0, len(pestoArgs)+2) // +2 for mode and socket path + args = append(args, mode) + args = append(args, pestoArgs...) + args = append(args, socketPath) + + logrus.Debugf("pesto arguments: %s", strings.Join(args, " ")) + + out, err := exec.Command(pestoPath, args...).CombinedOutput() + if err != nil { + return fmt.Errorf("pesto failed: %w\noutput: %s", err, string(out)) + } + if len(out) > 0 { + logrus.Debugf("pesto output: %s", strings.TrimSpace(string(out))) + } + return nil +} + +// portMappingsToPestoArgs converts PortMappings into pesto CLI arguments. +// +// When HostIP is set, a single binding is created (e.g. "-t 127.0.0.1/8080"). +// When HostIP is empty, both IPv4 and IPv6 bindings are created so that +// dual-stack networks work: "-t 0.0.0.0/8080 -t [::]/8080". +func portMappingsToPestoArgs(ports []types.PortMapping) ([]string, error) { + var args []string + + for _, p := range ports { + var addrs []string + switch { + case p.HostIP == "": + addrs = []string{"0.0.0.0/", "[::]/"} + case strings.Contains(p.HostIP, ":"): + addrs = []string{"[" + p.HostIP + "]/"} + default: + addrs = []string{p.HostIP + "/"} + } + + for protocol := range strings.SplitSeq(p.Protocol, ",") { + var flag string + switch protocol { + case "tcp": + flag = "-t" + case "udp": + flag = "-u" + default: + return nil, fmt.Errorf("pesto: unsupported protocol %s", protocol) + } + + portRange := p.Range + if portRange == 0 { + portRange = 1 + } + + for _, addr := range addrs { + var arg string + if portRange == 1 { + arg = fmt.Sprintf("%s%d", addr, p.HostPort) + } else { + arg = fmt.Sprintf("%s%d-%d", addr, p.HostPort, p.HostPort+portRange-1) + } + args = append(args, flag, arg) + } + } + } + return args, nil +} diff --git a/common/libnetwork/pasta/pesto_linux_test.go b/common/libnetwork/pasta/pesto_linux_test.go new file mode 100644 index 0000000000..45700ded10 --- /dev/null +++ b/common/libnetwork/pasta/pesto_linux_test.go @@ -0,0 +1,205 @@ +package pasta + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "go.podman.io/common/libnetwork/types" +) + +func Test_portMappingsToPestoArgs(t *testing.T) { + tests := []struct { + name string + ports []types.PortMapping + want []string + wantErr string + }{ + { + name: "no ports returns nil", + ports: nil, + want: nil, + }, + { + name: "empty slice same as nil", + ports: []types.PortMapping{}, + want: nil, + }, + { + name: "single tcp port dual-stack", + ports: []types.PortMapping{ + {HostPort: 8080, ContainerPort: 80, Protocol: "tcp", Range: 1}, + }, + want: []string{"-t", "0.0.0.0/8080", "-t", "[::]/8080"}, + }, + { + name: "single udp port dual-stack", + ports: []types.PortMapping{ + {HostPort: 53, ContainerPort: 53, Protocol: "udp", Range: 1}, + }, + want: []string{"-u", "0.0.0.0/53", "-u", "[::]/53"}, + }, + { + name: "tcp and udp port dual-stack", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "tcp", Range: 1}, + {HostPort: 53, ContainerPort: 53, Protocol: "udp", Range: 1}, + }, + want: []string{"-t", "0.0.0.0/80", "-t", "[::]/80", "-u", "0.0.0.0/53", "-u", "[::]/53"}, + }, + { + name: "dual protocol on single mapping", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "tcp,udp", Range: 1}, + }, + want: []string{"-t", "0.0.0.0/80", "-t", "[::]/80", "-u", "0.0.0.0/80", "-u", "[::]/80"}, + }, + { + name: "port range expands to host port range", + ports: []types.PortMapping{ + {HostPort: 8000, ContainerPort: 80, Protocol: "tcp", Range: 5}, + }, + want: []string{"-t", "0.0.0.0/8000-8004", "-t", "[::]/8000-8004"}, + }, + { + name: "range of zero treated as single port", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "tcp", Range: 0}, + }, + want: []string{"-t", "0.0.0.0/80", "-t", "[::]/80"}, + }, + { + name: "range of two", + ports: []types.PortMapping{ + {HostPort: 3000, ContainerPort: 3000, Protocol: "tcp", Range: 2}, + }, + want: []string{"-t", "0.0.0.0/3000-3001", "-t", "[::]/3000-3001"}, + }, + { + name: "explicit IPv4 host IP", + ports: []types.PortMapping{ + {HostIP: "127.0.0.1", HostPort: 443, ContainerPort: 443, Protocol: "tcp", Range: 1}, + }, + want: []string{"-t", "127.0.0.1/443"}, + }, + { + name: "IPv6 host IP gets brackets", + ports: []types.PortMapping{ + {HostIP: "::1", HostPort: 8080, ContainerPort: 80, Protocol: "tcp", Range: 1}, + }, + want: []string{"-t", "[::1]/8080"}, + }, + { + name: "full-form IPv6 host IP", + ports: []types.PortMapping{ + {HostIP: "fd00::1", HostPort: 80, ContainerPort: 80, Protocol: "udp", Range: 1}, + }, + want: []string{"-u", "[fd00::1]/80"}, + }, + { + name: "multiple tcp ports dual-stack", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "tcp", Range: 1}, + {HostPort: 443, ContainerPort: 443, Protocol: "tcp", Range: 1}, + }, + want: []string{"-t", "0.0.0.0/80", "-t", "[::]/80", "-t", "0.0.0.0/443", "-t", "[::]/443"}, + }, + { + name: "unsupported protocol returns error", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "sctp", Range: 1}, + }, + wantErr: "pesto: unsupported protocol sctp", + }, + { + name: "unsupported protocol mixed with valid returns error", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "tcp", Range: 1}, + {HostPort: 90, ContainerPort: 90, Protocol: "sctp", Range: 1}, + }, + wantErr: "pesto: unsupported protocol sctp", + }, + { + name: "explicit host IP on udp", + ports: []types.PortMapping{ + {HostIP: "10.0.0.1", HostPort: 3000, ContainerPort: 3000, Protocol: "udp", Range: 1}, + }, + want: []string{"-u", "10.0.0.1/3000"}, + }, + { + name: "container port does not appear in args", + ports: []types.PortMapping{ + {HostPort: 9090, ContainerPort: 3000, Protocol: "tcp", Range: 1}, + }, + want: []string{"-t", "0.0.0.0/9090", "-t", "[::]/9090"}, + }, + { + name: "host IP with range", + ports: []types.PortMapping{ + {HostIP: "10.0.0.1", HostPort: 3000, ContainerPort: 3000, Protocol: "udp", Range: 3}, + }, + want: []string{"-u", "10.0.0.1/3000-3002"}, + }, + { + name: "range with dual protocol", + ports: []types.PortMapping{ + {HostPort: 5000, ContainerPort: 5000, Protocol: "tcp,udp", Range: 3}, + }, + want: []string{"-t", "0.0.0.0/5000-5002", "-t", "[::]/5000-5002", "-u", "0.0.0.0/5000-5002", "-u", "[::]/5000-5002"}, + }, + { + name: "IPv6 host IP with range", + ports: []types.PortMapping{ + {HostIP: "::1", HostPort: 5000, ContainerPort: 5000, Protocol: "tcp", Range: 4}, + }, + want: []string{"-t", "[::1]/5000-5003"}, + }, + { + name: "mixed explicit and default host IPs", + ports: []types.PortMapping{ + {HostIP: "10.0.0.1", HostPort: 80, ContainerPort: 80, Protocol: "tcp", Range: 1}, + {HostPort: 443, ContainerPort: 443, Protocol: "tcp", Range: 1}, + }, + want: []string{"-t", "10.0.0.1/80", "-t", "0.0.0.0/443", "-t", "[::]/443"}, + }, + { + name: "triple protocol with unsupported in middle returns error", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "tcp,sctp,udp", Range: 1}, + }, + wantErr: "pesto: unsupported protocol sctp", + }, + { + name: "dual protocol with explicit IPv4", + ports: []types.PortMapping{ + {HostIP: "192.168.1.1", HostPort: 80, ContainerPort: 80, Protocol: "tcp,udp", Range: 1}, + }, + want: []string{"-t", "192.168.1.1/80", "-u", "192.168.1.1/80"}, + }, + { + name: "dual protocol with explicit IPv6", + ports: []types.PortMapping{ + {HostIP: "fd00::1", HostPort: 80, ContainerPort: 80, Protocol: "tcp,udp", Range: 1}, + }, + want: []string{"-t", "[fd00::1]/80", "-u", "[fd00::1]/80"}, + }, + { + name: "all unsupported protocols returns error", + ports: []types.PortMapping{ + {HostPort: 80, ContainerPort: 80, Protocol: "sctp,dccp", Range: 1}, + }, + wantErr: "pesto: unsupported protocol sctp", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := portMappingsToPestoArgs(tt.ports) + if tt.wantErr != "" { + assert.EqualError(t, err, tt.wantErr) + return + } + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/common/libnetwork/types/network.go b/common/libnetwork/types/network.go index 11ba341355..85159bad2b 100644 --- a/common/libnetwork/types/network.go +++ b/common/libnetwork/types/network.go @@ -377,6 +377,9 @@ type RootlessNetnsInfo struct { DnsForwardIps []string // MapGuestIps should be used for the host.containers.internal entry when set MapGuestIps []string + // PestoSocketPath is the path to the pasta control socket for dynamic + // port forwarding via pesto. Empty when pasta was started without -c. + PestoSocketPath string } // FilterFunc can be passed to NetworkList to filter the networks. diff --git a/common/pkg/config/config.go b/common/pkg/config/config.go index eb21a3895a..e53dcce63b 100644 --- a/common/pkg/config/config.go +++ b/common/pkg/config/config.go @@ -630,8 +630,19 @@ type NetworkConfig struct { // If multiple IPs are specified, separate port mapping for each of the specified // IP would be created. DefaultHostIPs configfile.Slice `toml:"default_host_ips,omitempty"` + + // RootlessPortForwarder selects the port forwarding mechanism for rootless + // bridge networks. Valid values are RootlessPortForwarderRootlessport + // (default, userspace TCP/UDP proxy) and RootlessPortForwarderPasta + // (experimental, pasta's kernel splice preserving the original source IP). + RootlessPortForwarder string `toml:"rootless_port_forwarder,omitempty"` } +const ( + RootlessPortForwarderRootlessport = "rootlessport" + RootlessPortForwarderPasta = "pasta" +) + type SubnetPool struct { // Base is a bigger subnet which will be used to allocate a subnet with // the given size. diff --git a/common/pkg/config/containers.conf b/common/pkg/config/containers.conf index 9a6f38bfd2..7b0b8919cd 100644 --- a/common/pkg/config/containers.conf +++ b/common/pkg/config/containers.conf @@ -410,6 +410,14 @@ default_sysctls = [ # #default_rootless_network_cmd = "pasta" +# Select the port forwarding mechanism for rootless bridge networks. +# "rootlessport" (default) uses a userspace TCP/UDP proxy. +# "pasta" (experimental) uses pasta's control socket to add port forwarding rules +# via kernel splice, which preserves the original source IP address inside the +# container. This option is experimental and subject to change. +# +#rootless_port_forwarder = "rootlessport" + # Path to the directory where network configuration files are located. # The default is "/etc/containers/networks" as root # and "$graphroot/networks" as rootless. diff --git a/common/pkg/config/default.go b/common/pkg/config/default.go index fddbe281f6..49639e6991 100644 --- a/common/pkg/config/default.go +++ b/common/pkg/config/default.go @@ -249,6 +249,7 @@ func defaultConfig() (*Config, error) { DefaultRootlessNetworkCmd: "pasta", DNSBindPort: 0, NetavarkPluginDirs: configfile.NewSlice(DefaultNetavarkPluginDirs), + RootlessPortForwarder: RootlessPortForwarderRootlessport, }, Engine: *defaultEngineConfig, Secrets: defaultSecretConfig(),