Skip to content

Commit 4cf309b

Browse files
authored
Merge pull request #565 from AkihiroSuda/preserve-src-ip
Preserve real client source IP in builtin port driver via IP_TRANSPARENT
2 parents 10e0807 + b7d24a0 commit 4cf309b

File tree

11 files changed

+411
-36
lines changed

11 files changed

+411
-36
lines changed

.github/workflows/main.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ jobs:
169169
run: |
170170
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
171171
rootlesskit:test-integration ./benchmark-iperf3-port.sh builtin --detach-netns
172+
- name: "Benchmark: TCP Ports (network driver=slirp4netns, port driver=builtin) with source-ip-transparent=false"
173+
run: |
174+
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \
175+
rootlesskit:test-integration ./benchmark-iperf3-port.sh builtin --source-ip-transparent=false
172176
- name: "Benchmark: TCP Ports (network driver=pasta, port driver=implicit)"
173177
run: |
174178
docker run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/net/tun \

cmd/rootlesskit/main.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,11 @@ See https://rootlesscontaine.rs/getting-started/common/ .
204204
Aliases: []string{"p"},
205205
Usage: "publish ports. e.g. \"127.0.0.1:8080:80/tcp\"",
206206
}, CategoryPort),
207+
Categorize(&cli.BoolFlag{
208+
Name: "source-ip-transparent",
209+
Usage: "preserve real client source IP using IP_TRANSPARENT (builtin port driver)",
210+
Value: true,
211+
}, CategoryPort),
207212
Categorize(&cli.BoolFlag{
208213
Name: "pidns",
209214
Usage: "create a PID namespace",
@@ -620,7 +625,7 @@ func createParentOpt(clicontext *cli.Context) (parent.Opt, error) {
620625
if opt.NetworkDriver == nil {
621626
return opt, errors.New("port driver requires non-host network")
622627
}
623-
opt.PortDriver, err = builtin.NewParentDriver(&logrusDebugWriter{label: "port/builtin"}, opt.StateDir)
628+
opt.PortDriver, err = builtin.NewParentDriver(&logrusDebugWriter{label: "port/builtin"}, opt.StateDir, clicontext.Bool("source-ip-transparent"))
624629
if err != nil {
625630
return opt, err
626631
}

pkg/port/builtin/builtin.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ import (
99
)
1010

1111
var (
12-
NewParentDriver func(logWriter io.Writer, stateDir string) (port.ParentDriver, error) = parent.NewDriver
13-
NewChildDriver func(logWriter io.Writer) port.ChildDriver = child.NewDriver
12+
NewParentDriver func(logWriter io.Writer, stateDir string, sourceIPTransparent bool) (port.ParentDriver, error) = parent.NewDriver
13+
NewChildDriver func(logWriter io.Writer) port.ChildDriver = child.NewDriver
1414
)
1515

1616
// Available indicates whether this port driver is compiled in (used for generating help text)

pkg/port/builtin/builtin_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@ func TestBuiltIn(t *testing.T) {
2121
t.Fatal(err)
2222
}
2323
defer os.RemoveAll(tmpDir)
24-
d, err := NewParentDriver(os.Stderr, tmpDir)
24+
d, err := NewParentDriver(os.Stderr, tmpDir, true)
2525
if err != nil {
2626
t.Fatal(err)
2727
}
2828
pf := func() port.ParentDriver {
2929
return d
3030
}
3131
testsuite.Run(t, pf)
32+
testsuite.RunTCPTransparent(t, pf)
3233
}

pkg/port/builtin/child/child.go

Lines changed: 112 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@ import (
66
"io"
77
"net"
88
"os"
9+
"os/exec"
910
"strconv"
1011
"strings"
12+
"sync"
13+
"syscall"
14+
"time"
1115

1216
"golang.org/x/sys/unix"
1317

@@ -25,10 +29,15 @@ func NewDriver(logWriter io.Writer) port.ChildDriver {
2529
}
2630

2731
type childDriver struct {
28-
logWriter io.Writer
32+
logWriter io.Writer
33+
sourceIPTransparent bool
34+
routingSetup sync.Once
35+
routingReady bool
36+
routingWarn sync.Once
2937
}
3038

3139
func (d *childDriver) RunChildDriver(opaque map[string]string, quit <-chan struct{}, detachedNetNSPath string) error {
40+
d.sourceIPTransparent = opaque[opaquepkg.SourceIPTransparent] == "true"
3241
socketPath := opaque[opaquepkg.SocketPath]
3342
if socketPath == "" {
3443
return errors.New("socket path not set")
@@ -119,7 +128,6 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
119128
}
120129
// dialProto does not need "4", "6" suffix
121130
dialProto := strings.TrimSuffix(strings.TrimSuffix(req.Proto, "6"), "4")
122-
var dialer net.Dialer
123131
ip := req.IP
124132
if ip == "" {
125133
ip = "127.0.0.1"
@@ -135,9 +143,31 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
135143
}
136144
ip = p.String()
137145
}
138-
targetConn, err := dialer.Dial(dialProto, net.JoinHostPort(ip, strconv.Itoa(req.Port)))
139-
if err != nil {
140-
return err
146+
targetAddr := net.JoinHostPort(ip, strconv.Itoa(req.Port))
147+
148+
var targetConn net.Conn
149+
var err error
150+
if d.sourceIPTransparent && req.SourceIP != "" && req.SourcePort != 0 && dialProto == "tcp" && !net.ParseIP(req.SourceIP).IsLoopback() {
151+
d.routingSetup.Do(func() { d.routingReady = d.setupTransparentRouting() })
152+
if !d.routingReady {
153+
d.routingWarn.Do(func() {
154+
fmt.Fprintf(d.logWriter, "source IP transparent: falling back to non-transparent mode, client source IPs will not be preserved\n")
155+
})
156+
goto fallback
157+
}
158+
targetConn, err = transparentDial(dialProto, targetAddr, req.SourceIP, req.SourcePort)
159+
if err != nil {
160+
fmt.Fprintf(d.logWriter, "transparent dial failed, falling back: %v\n", err)
161+
targetConn, err = nil, nil
162+
}
163+
}
164+
fallback:
165+
if targetConn == nil {
166+
var dialer net.Dialer
167+
targetConn, err = dialer.Dial(dialProto, targetAddr)
168+
if err != nil {
169+
return err
170+
}
141171
}
142172
defer targetConn.Close() // no effect on duplicated FD
143173
targetConnFiler, ok := targetConn.(filer)
@@ -164,6 +194,83 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
164194
return err
165195
}
166196

197+
// setupTransparentRouting sets up policy routing so that response packets
198+
// destined to transparent-bound source IPs are delivered locally.
199+
//
200+
// Transparent sockets (IP_TRANSPARENT) bind to non-local addresses (the real
201+
// client IP). Response packets to these addresses must be routed locally instead
202+
// of being sent out through the TAP device (slirp4netns).
203+
//
204+
// The transparent SYN goes through OUTPUT (where we tag it with CONNMARK) and
205+
// then either:
206+
//
207+
// 1. Gets DNAT'd to the container (nerdctl/CNI): the SYN-ACK arrives via the
208+
// bridge in PREROUTING, where we restore connmark to fwmark.
209+
//
210+
// 2. Goes through loopback to a userspace proxy like docker-proxy: the SYN
211+
// enters PREROUTING on loopback with connmark, which sets fwmark. With
212+
// tcp_fwmark_accept=1, the accepted socket inherits the fwmark. The proxy's
213+
// SYN-ACK is then routed via the fwmark table (local delivery) instead of
214+
// the default route (TAP), allowing it to reach the transparent socket.
215+
func (d *childDriver) setupTransparentRouting() bool {
216+
// Check that iptables is available before proceeding.
217+
if _, err := exec.LookPath("iptables"); err != nil {
218+
fmt.Fprintf(d.logWriter, "source IP transparent: iptables not found, disabling: %v\n", err)
219+
return false
220+
}
221+
// Verify the connmark module is usable (kernel module might not be loaded).
222+
if out, err := exec.Command("iptables", "-t", "mangle", "-L", "-n").CombinedOutput(); err != nil {
223+
fmt.Fprintf(d.logWriter, "source IP transparent: iptables mangle table not available, disabling: %v: %s\n", err, out)
224+
return false
225+
}
226+
cmds := [][]string{
227+
// Table 100: treat all addresses as local (for delivery to transparent sockets)
228+
{"ip", "route", "add", "local", "default", "dev", "lo", "table", "100"},
229+
{"ip", "-6", "route", "add", "local", "default", "dev", "lo", "table", "100"},
230+
// Route fwmark-100 packets via table 100
231+
{"ip", "rule", "add", "fwmark", "100", "lookup", "100", "priority", "100"},
232+
{"ip", "-6", "rule", "add", "fwmark", "100", "lookup", "100", "priority", "100"},
233+
// Inherit fwmark from SYN to accepted socket (needed for userspace proxies
234+
// like docker-proxy, so that SYN-ACK routing uses table 100)
235+
{"sysctl", "-w", "net.ipv4.tcp_fwmark_accept=1"},
236+
// In OUTPUT: tag transparent connections (non-local source) with CONNMARK
237+
{"iptables", "-t", "mangle", "-A", "OUTPUT", "-p", "tcp", "-m", "addrtype", "!", "--src-type", "LOCAL", "-j", "CONNMARK", "--set-mark", "100"},
238+
{"ip6tables", "-t", "mangle", "-A", "OUTPUT", "-p", "tcp", "-m", "addrtype", "!", "--src-type", "LOCAL", "-j", "CONNMARK", "--set-mark", "100"},
239+
// In PREROUTING: restore connmark to fwmark for routing
240+
{"iptables", "-t", "mangle", "-A", "PREROUTING", "-p", "tcp", "-m", "connmark", "--mark", "100", "-j", "MARK", "--set-mark", "100"},
241+
{"ip6tables", "-t", "mangle", "-A", "PREROUTING", "-p", "tcp", "-m", "connmark", "--mark", "100", "-j", "MARK", "--set-mark", "100"},
242+
}
243+
for _, args := range cmds {
244+
if out, err := exec.Command(args[0], args[1:]...).CombinedOutput(); err != nil {
245+
fmt.Fprintf(d.logWriter, "source IP transparent routing setup: %v: %s\n", err, out)
246+
}
247+
}
248+
return true
249+
}
250+
251+
// transparentDial dials targetAddr using IP_TRANSPARENT, binding to the given
252+
// source IP and port so the backend service sees the real client address.
253+
func transparentDial(dialProto, targetAddr, sourceIP string, sourcePort int) (net.Conn, error) {
254+
dialer := net.Dialer{
255+
Timeout: time.Second,
256+
LocalAddr: &net.TCPAddr{IP: net.ParseIP(sourceIP), Port: sourcePort},
257+
Control: func(network, address string, c syscall.RawConn) error {
258+
var sockErr error
259+
if err := c.Control(func(fd uintptr) {
260+
if strings.Contains(network, "6") {
261+
sockErr = unix.SetsockoptInt(int(fd), unix.SOL_IPV6, unix.IPV6_TRANSPARENT, 1)
262+
} else {
263+
sockErr = unix.SetsockoptInt(int(fd), unix.SOL_IP, unix.IP_TRANSPARENT, 1)
264+
}
265+
}); err != nil {
266+
return err
267+
}
268+
return sockErr
269+
},
270+
}
271+
return dialer.Dial(dialProto, targetAddr)
272+
}
273+
167274
// filer is implemented by *net.TCPConn and *net.UDPConn
168275
type filer interface {
169276
File() (f *os.File, err error)

pkg/port/builtin/msg/msg.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ type Request struct {
2525
Port int
2626
ParentIP string
2727
HostGatewayIP string
28+
SourceIP string `json:",omitempty"` // real client IP for IP_TRANSPARENT
29+
SourcePort int `json:",omitempty"` // real client port for IP_TRANSPARENT
2830
}
2931

3032
// Reply may contain FD as OOB
@@ -69,7 +71,9 @@ func hostGatewayIP() string {
6971

7072
// ConnectToChild connects to the child UNIX socket, and obtains TCP or UDP socket FD
7173
// that corresponds to the port spec.
72-
func ConnectToChild(c *net.UnixConn, spec port.Spec) (int, error) {
74+
// sourceAddr is the real client address (e.g., from net.Conn.RemoteAddr()) for IP_TRANSPARENT support.
75+
// Pass nil to skip source IP preservation.
76+
func ConnectToChild(c *net.UnixConn, spec port.Spec, sourceAddr net.Addr) (int, error) {
7377
req := Request{
7478
Type: RequestTypeConnect,
7579
Proto: spec.Proto,
@@ -78,6 +82,10 @@ func ConnectToChild(c *net.UnixConn, spec port.Spec) (int, error) {
7882
ParentIP: spec.ParentIP,
7983
HostGatewayIP: hostGatewayIP(),
8084
}
85+
if tcpAddr, ok := sourceAddr.(*net.TCPAddr); ok && tcpAddr != nil {
86+
req.SourceIP = tcpAddr.IP.String()
87+
req.SourcePort = tcpAddr.Port
88+
}
8189
if _, err := lowlevelmsgutil.MarshalToWriter(c, &req); err != nil {
8290
return 0, err
8391
}
@@ -114,21 +122,21 @@ func ConnectToChild(c *net.UnixConn, spec port.Spec) (int, error) {
114122
}
115123

116124
// ConnectToChildWithSocketPath wraps ConnectToChild
117-
func ConnectToChildWithSocketPath(socketPath string, spec port.Spec) (int, error) {
125+
func ConnectToChildWithSocketPath(socketPath string, spec port.Spec, sourceAddr net.Addr) (int, error) {
118126
var dialer net.Dialer
119127
conn, err := dialer.Dial("unix", socketPath)
120128
if err != nil {
121129
return 0, err
122130
}
123131
defer conn.Close()
124132
c := conn.(*net.UnixConn)
125-
return ConnectToChild(c, spec)
133+
return ConnectToChild(c, spec, sourceAddr)
126134
}
127135

128136
// ConnectToChildWithRetry retries ConnectToChild every (i*5) milliseconds.
129-
func ConnectToChildWithRetry(socketPath string, spec port.Spec, retries int) (int, error) {
137+
func ConnectToChildWithRetry(socketPath string, spec port.Spec, retries int, sourceAddr net.Addr) (int, error) {
130138
for i := 0; i < retries; i++ {
131-
fd, err := ConnectToChildWithSocketPath(socketPath, spec)
139+
fd, err := ConnectToChildWithSocketPath(socketPath, spec, sourceAddr)
132140
if i == retries-1 && err != nil {
133141
return 0, err
134142
}

pkg/port/builtin/opaque/opaque.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package opaque
22

33
const (
4-
SocketPath = "builtin.socketpath"
5-
ChildReadyPipePath = "builtin.readypipepath"
4+
SocketPath = "builtin.socketpath"
5+
ChildReadyPipePath = "builtin.readypipepath"
6+
SourceIPTransparent = "builtin.source-ip-transparent"
67
)

pkg/port/builtin/parent/parent.go

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424
)
2525

2626
// NewDriver for builtin driver.
27-
func NewDriver(logWriter io.Writer, stateDir string) (port.ParentDriver, error) {
27+
func NewDriver(logWriter io.Writer, stateDir string, sourceIPTransparent bool) (port.ParentDriver, error) {
2828
// TODO: consider using socketpair FD instead of socket file
2929
socketPath := filepath.Join(stateDir, ".bp.sock")
3030
childReadyPipePath := filepath.Join(stateDir, ".bp-ready.pipe")
@@ -36,24 +36,26 @@ func NewDriver(logWriter io.Writer, stateDir string) (port.ParentDriver, error)
3636
return nil, fmt.Errorf("cannot mkfifo %s: %w", childReadyPipePath, err)
3737
}
3838
d := driver{
39-
logWriter: logWriter,
40-
socketPath: socketPath,
41-
childReadyPipePath: childReadyPipePath,
42-
ports: make(map[int]*port.Status, 0),
43-
stoppers: make(map[int]func(context.Context) error, 0),
44-
nextID: 1,
39+
logWriter: logWriter,
40+
socketPath: socketPath,
41+
childReadyPipePath: childReadyPipePath,
42+
sourceIPTransparent: sourceIPTransparent,
43+
ports: make(map[int]*port.Status, 0),
44+
stoppers: make(map[int]func(context.Context) error, 0),
45+
nextID: 1,
4546
}
4647
return &d, nil
4748
}
4849

4950
type driver struct {
50-
logWriter io.Writer
51-
socketPath string
52-
childReadyPipePath string
53-
mu sync.Mutex
54-
ports map[int]*port.Status
55-
stoppers map[int]func(context.Context) error
56-
nextID int
51+
logWriter io.Writer
52+
socketPath string
53+
childReadyPipePath string
54+
sourceIPTransparent bool
55+
mu sync.Mutex
56+
ports map[int]*port.Status
57+
stoppers map[int]func(context.Context) error
58+
nextID int
5759
}
5860

5961
func (d *driver) Info(ctx context.Context) (*api.PortDriverInfo, error) {
@@ -66,10 +68,14 @@ func (d *driver) Info(ctx context.Context) (*api.PortDriverInfo, error) {
6668
}
6769

6870
func (d *driver) OpaqueForChild() map[string]string {
69-
return map[string]string{
71+
m := map[string]string{
7072
opaque.SocketPath: d.socketPath,
7173
opaque.ChildReadyPipePath: d.childReadyPipePath,
7274
}
75+
if d.sourceIPTransparent {
76+
m[opaque.SourceIPTransparent] = "true"
77+
}
78+
return m
7379
}
7480

7581
func (d *driver) RunParentDriver(initComplete chan struct{}, quit <-chan struct{}, _ *port.ChildContext) error {

pkg/port/builtin/parent/tcp/tcp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, stoppedCh ch
5959
func copyConnToChild(c net.Conn, socketPath string, spec port.Spec, stopCh <-chan struct{}) error {
6060
defer c.Close()
6161
// get fd from the child as an SCM_RIGHTS cmsg
62-
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10)
62+
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10, c.RemoteAddr())
6363
if err != nil {
6464
return err
6565
}

pkg/port/builtin/parent/udp/udp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func Run(socketPath string, spec port.Spec, stopCh <-chan struct{}, stoppedCh ch
2626
Listener: c,
2727
BackendDial: func() (*net.UDPConn, error) {
2828
// get fd from the child as an SCM_RIGHTS cmsg
29-
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10)
29+
fd, err := msg.ConnectToChildWithRetry(socketPath, spec, 10, nil)
3030
if err != nil {
3131
return nil, err
3232
}

0 commit comments

Comments
 (0)