@@ -6,8 +6,12 @@ import (
66 "io"
77 "net"
88 "os"
9+ "os/exec"
910 "strconv"
1011 "strings"
12+ "sync"
13+ "syscall"
14+ "time"
1115
1216 "golang.org/x/sys/unix"
1317
@@ -25,10 +29,15 @@ func NewDriver(logWriter io.Writer) port.ChildDriver {
2529}
2630
2731type childDriver struct {
28- logWriter io.Writer
32+ logWriter io.Writer
33+ sourceIPTransparent bool
34+ routingSetup sync.Once
35+ routingReady bool
36+ routingWarn sync.Once
2937}
3038
3139func (d * childDriver ) RunChildDriver (opaque map [string ]string , quit <- chan struct {}, detachedNetNSPath string ) error {
40+ d .sourceIPTransparent = opaque [opaquepkg .SourceIPTransparent ] == "true"
3241 socketPath := opaque [opaquepkg .SocketPath ]
3342 if socketPath == "" {
3443 return errors .New ("socket path not set" )
@@ -119,7 +128,6 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
119128 }
120129 // dialProto does not need "4", "6" suffix
121130 dialProto := strings .TrimSuffix (strings .TrimSuffix (req .Proto , "6" ), "4" )
122- var dialer net.Dialer
123131 ip := req .IP
124132 if ip == "" {
125133 ip = "127.0.0.1"
@@ -135,9 +143,31 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
135143 }
136144 ip = p .String ()
137145 }
138- targetConn , err := dialer .Dial (dialProto , net .JoinHostPort (ip , strconv .Itoa (req .Port )))
139- if err != nil {
140- return err
146+ targetAddr := net .JoinHostPort (ip , strconv .Itoa (req .Port ))
147+
148+ var targetConn net.Conn
149+ var err error
150+ if d .sourceIPTransparent && req .SourceIP != "" && req .SourcePort != 0 && dialProto == "tcp" && ! net .ParseIP (req .SourceIP ).IsLoopback () {
151+ d .routingSetup .Do (func () { d .routingReady = d .setupTransparentRouting () })
152+ if ! d .routingReady {
153+ d .routingWarn .Do (func () {
154+ fmt .Fprintf (d .logWriter , "source IP transparent: falling back to non-transparent mode, client source IPs will not be preserved\n " )
155+ })
156+ goto fallback
157+ }
158+ targetConn , err = transparentDial (dialProto , targetAddr , req .SourceIP , req .SourcePort )
159+ if err != nil {
160+ fmt .Fprintf (d .logWriter , "transparent dial failed, falling back: %v\n " , err )
161+ targetConn , err = nil , nil
162+ }
163+ }
164+ fallback:
165+ if targetConn == nil {
166+ var dialer net.Dialer
167+ targetConn , err = dialer .Dial (dialProto , targetAddr )
168+ if err != nil {
169+ return err
170+ }
141171 }
142172 defer targetConn .Close () // no effect on duplicated FD
143173 targetConnFiler , ok := targetConn .(filer )
@@ -164,6 +194,83 @@ func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *msg.Request) er
164194 return err
165195}
166196
197+ // setupTransparentRouting sets up policy routing so that response packets
198+ // destined to transparent-bound source IPs are delivered locally.
199+ //
200+ // Transparent sockets (IP_TRANSPARENT) bind to non-local addresses (the real
201+ // client IP). Response packets to these addresses must be routed locally instead
202+ // of being sent out through the TAP device (slirp4netns).
203+ //
204+ // The transparent SYN goes through OUTPUT (where we tag it with CONNMARK) and
205+ // then either:
206+ //
207+ // 1. Gets DNAT'd to the container (nerdctl/CNI): the SYN-ACK arrives via the
208+ // bridge in PREROUTING, where we restore connmark to fwmark.
209+ //
210+ // 2. Goes through loopback to a userspace proxy like docker-proxy: the SYN
211+ // enters PREROUTING on loopback with connmark, which sets fwmark. With
212+ // tcp_fwmark_accept=1, the accepted socket inherits the fwmark. The proxy's
213+ // SYN-ACK is then routed via the fwmark table (local delivery) instead of
214+ // the default route (TAP), allowing it to reach the transparent socket.
215+ func (d * childDriver ) setupTransparentRouting () bool {
216+ // Check that iptables is available before proceeding.
217+ if _ , err := exec .LookPath ("iptables" ); err != nil {
218+ fmt .Fprintf (d .logWriter , "source IP transparent: iptables not found, disabling: %v\n " , err )
219+ return false
220+ }
221+ // Verify the connmark module is usable (kernel module might not be loaded).
222+ if out , err := exec .Command ("iptables" , "-t" , "mangle" , "-L" , "-n" ).CombinedOutput (); err != nil {
223+ fmt .Fprintf (d .logWriter , "source IP transparent: iptables mangle table not available, disabling: %v: %s\n " , err , out )
224+ return false
225+ }
226+ cmds := [][]string {
227+ // Table 100: treat all addresses as local (for delivery to transparent sockets)
228+ {"ip" , "route" , "add" , "local" , "default" , "dev" , "lo" , "table" , "100" },
229+ {"ip" , "-6" , "route" , "add" , "local" , "default" , "dev" , "lo" , "table" , "100" },
230+ // Route fwmark-100 packets via table 100
231+ {"ip" , "rule" , "add" , "fwmark" , "100" , "lookup" , "100" , "priority" , "100" },
232+ {"ip" , "-6" , "rule" , "add" , "fwmark" , "100" , "lookup" , "100" , "priority" , "100" },
233+ // Inherit fwmark from SYN to accepted socket (needed for userspace proxies
234+ // like docker-proxy, so that SYN-ACK routing uses table 100)
235+ {"sysctl" , "-w" , "net.ipv4.tcp_fwmark_accept=1" },
236+ // In OUTPUT: tag transparent connections (non-local source) with CONNMARK
237+ {"iptables" , "-t" , "mangle" , "-A" , "OUTPUT" , "-p" , "tcp" , "-m" , "addrtype" , "!" , "--src-type" , "LOCAL" , "-j" , "CONNMARK" , "--set-mark" , "100" },
238+ {"ip6tables" , "-t" , "mangle" , "-A" , "OUTPUT" , "-p" , "tcp" , "-m" , "addrtype" , "!" , "--src-type" , "LOCAL" , "-j" , "CONNMARK" , "--set-mark" , "100" },
239+ // In PREROUTING: restore connmark to fwmark for routing
240+ {"iptables" , "-t" , "mangle" , "-A" , "PREROUTING" , "-p" , "tcp" , "-m" , "connmark" , "--mark" , "100" , "-j" , "MARK" , "--set-mark" , "100" },
241+ {"ip6tables" , "-t" , "mangle" , "-A" , "PREROUTING" , "-p" , "tcp" , "-m" , "connmark" , "--mark" , "100" , "-j" , "MARK" , "--set-mark" , "100" },
242+ }
243+ for _ , args := range cmds {
244+ if out , err := exec .Command (args [0 ], args [1 :]... ).CombinedOutput (); err != nil {
245+ fmt .Fprintf (d .logWriter , "source IP transparent routing setup: %v: %s\n " , err , out )
246+ }
247+ }
248+ return true
249+ }
250+
251+ // transparentDial dials targetAddr using IP_TRANSPARENT, binding to the given
252+ // source IP and port so the backend service sees the real client address.
253+ func transparentDial (dialProto , targetAddr , sourceIP string , sourcePort int ) (net.Conn , error ) {
254+ dialer := net.Dialer {
255+ Timeout : time .Second ,
256+ LocalAddr : & net.TCPAddr {IP : net .ParseIP (sourceIP ), Port : sourcePort },
257+ Control : func (network , address string , c syscall.RawConn ) error {
258+ var sockErr error
259+ if err := c .Control (func (fd uintptr ) {
260+ if strings .Contains (network , "6" ) {
261+ sockErr = unix .SetsockoptInt (int (fd ), unix .SOL_IPV6 , unix .IPV6_TRANSPARENT , 1 )
262+ } else {
263+ sockErr = unix .SetsockoptInt (int (fd ), unix .SOL_IP , unix .IP_TRANSPARENT , 1 )
264+ }
265+ }); err != nil {
266+ return err
267+ }
268+ return sockErr
269+ },
270+ }
271+ return dialer .Dial (dialProto , targetAddr )
272+ }
273+
167274// filer is implemented by *net.TCPConn and *net.UDPConn
168275type filer interface {
169276 File () (f * os.File , err error )
0 commit comments