diff --git a/config/config.yaml b/config/config.yaml index 9666b09a..af8a0555 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -399,7 +399,9 @@ checks: postage-depth: 21 postage-label: test-label type: feed - + autotls: + timeout: 5m + type: autotls # simulations defines simulations Beekeeper can execute against the cluster # type filed allows defining same simulation with different names and options simulations: diff --git a/config/local.yaml b/config/local.yaml index a29fb7df..4de64953 100644 --- a/config/local.yaml +++ b/config/local.yaml @@ -25,6 +25,38 @@ clusters: config: local count: 2 mode: node + local-dns-autotls: + _inherit: "local" + node-groups: + bootnode: + mode: bootnode + bee-config: bootnode-local-dns-autotls + config: local-dns-autotls + nodes: + - name: bootnode-0 + bootnodes: /dns4/bootnode-0-headless.%s.svc.cluster.local/tcp/1634/p2p/QmaHzvd3iZduu275CMkMVZKwbsjXSyH3GJRj4UvFJApKcb + libp2p-key: '{"address":"28678fe31f09f722d53e77ca2395569f19959fa5","crypto":{"cipher":"aes-128-ctr","ciphertext":"0ff319684c4f8decf9c998047febe3417cfc45832b8bb62fd818183d54cf5d0183bfa021ed95addce3b33e83ce7ee73e926f00eea8241d96b349266a4d299829d3d22db0d536315b52b34db4a6778bfd3ce7631ad7256ea0bb9c50abea9de35d740b6fdc50caf929b1d19494690d9ed649105d02c14f5ec49d","cipherparams":{"iv":"4e9a50fb5852b5e61964f696be78066b"},"kdf":"scrypt","kdfparams":{"n":32768,"r":8,"p":1,"dklen":32,"salt":"4d513e81647e4150bb648ed8d2dda28d460802336bf24d620119eac66ae0c0c4"},"mac":"9ae71db96e5ddc1c214538d42082212bbbe53aeac09fcc3e3a8eff815648331e"},"version":3,"id":"ae3bc991-d89f-405a-9e6a-60e27347e22d"}' + swarm-key: '{"address":"f176839c150e52fe30e5c2b5c648465c6fdfa532","crypto":{"cipher":"aes-128-ctr","ciphertext":"352af096f0fca9dfbd20a6861bde43d988efe7f179e0a9ffd812a285fdcd63b9","cipherparams":{"iv":"613003f1f1bf93430c92629da33f8828"},"kdf":"scrypt","kdfparams":{"n":32768,"r":8,"p":1,"dklen":32,"salt":"ad1d99a4c64c95c26131e079e8c8a82221d58bf66a7ceb767c33a4c376c564b8"},"mac":"cafda1bc8ca0ffc2b22eb69afd1cf5072fd09412243443be1b0c6832f57924b6"},"version":3}' + bee: + bee-config: bee-local-dns-autotls + config: local-dns-autotls + count: 3 + mode: node + light: + bee-config: bee-local-light-autotls + config: local-light-autotls + count: 2 + mode: node + wss: + bee-config: bee-local-wss + config: local + count: 2 + mode: node + ultralight: + bee-config: bee-local-ultralight + config: local-ultralight + count: 1 + mode: node local-dns: _inherit: "local" node-groups: @@ -42,11 +74,21 @@ clusters: config: local-dns count: 3 mode: node + wss: + bee-config: bee-local-wss + config: local-dns + count: 2 + mode: node light: bee-config: bee-local-light config: local-light count: 2 mode: node + # ultralight: + # bee-config: bee-local-ultralight + # config: local-ultralight + # count: 2 + # mode: node local-gc: _inherit: "local" node-groups: @@ -100,13 +142,37 @@ node-groups: update-strategy: "RollingUpdate" local-dns: _inherit: "local" + local-dns-autotls: + _inherit: "local" + local-light-autotls: + _inherit: "local-light" local-gc: _inherit: "local" local-light: _inherit: "local" + local-ultralight: + _inherit: "local" + labels: + app.kubernetes.io/component: "node" + app.kubernetes.io/name: "bee" + app.kubernetes.io/part-of: "bee" + app.kubernetes.io/version: "latest" + beekeeper.ethswarm.org/node-funder: "false" # bee-configs defines Bee configuration that can be assigned to node-groups bee-configs: + bootnode-local-dns-autotls: + _inherit: "bee-local-dns-autotls" + bootnode-mode: true + bee-local-dns-autotls: + _inherit: "bee-local" + bootnode: /dnsaddr/bootnode-0-headless.local.svc.cluster.local + p2p-wss-enable: true + bee-local-light-autotls: + _inherit: "bee-local" + bootnode: /dnsaddr/bootnode-0-headless.local.svc.cluster.local + full-node: false + p2p-wss-enable: true bee-local: _inherit: "" allow-private-cidrs: true @@ -134,7 +200,7 @@ bee-configs: p2p-addr: ":1634" p2p-wss-addr: ":1635" p2p-ws-enable: false - p2p-wss-enable: true + p2p-wss-enable: false password: "beekeeper" payment-early-percent: 50 payment-threshold: 13500000 @@ -157,6 +223,9 @@ bee-configs: bootnode-local: _inherit: "bee-local" bootnode-mode: true + bee-local-wss: + _inherit: "bee-local" + p2p-wss-enable: true bee-local-dns: _inherit: "bee-local" bootnode: /dnsaddr/localhost @@ -171,6 +240,11 @@ bee-configs: bee-local-gc: _inherit: "bee-local" cache-capacity: 10 + bee-local-ultralight: + _inherit: "bee-local" + blockchain-rpc-endpoint: "" + full-node: false + swap-enable: false bootnode-local-gc: _inherit: "bee-local" cache-capacity: 10 @@ -398,3 +472,9 @@ checks: postage-depth: 21 postage-label: test-label type: feed + ci-autotls: + timeout: 15m + type: autotls + options: + ultralight-group: ultralight + wss-group: wss diff --git a/go.mod b/go.mod index ea704f2f..76f69b8d 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( github.com/go-git/go-git/v5 v5.13.2 github.com/google/uuid v1.6.0 github.com/gorilla/websocket v1.5.1 + github.com/multiformats/go-multiaddr v0.12.3 github.com/opentracing/opentracing-go v1.2.0 github.com/prometheus/client_golang v1.21.1 github.com/prometheus/common v0.62.0 @@ -94,7 +95,6 @@ require ( github.com/mr-tron/base58 v1.2.0 // indirect github.com/multiformats/go-base32 v0.1.0 // indirect github.com/multiformats/go-base36 v0.2.0 // indirect - github.com/multiformats/go-multiaddr v0.12.3 // indirect github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect github.com/multiformats/go-multibase v0.2.0 // indirect github.com/multiformats/go-multicodec v0.9.0 // indirect diff --git a/pkg/bee/api/node.go b/pkg/bee/api/node.go index de9423bc..789eeccc 100644 --- a/pkg/bee/api/node.go +++ b/pkg/bee/api/node.go @@ -122,6 +122,24 @@ func (n *NodeService) Peers(ctx context.Context) (resp Peers, err error) { return resp, err } +// ConnectResponse represents the response from the connect endpoint +type ConnectResponse struct { + Address string `json:"address"` +} + +// Connect connects to a peer using the provided multiaddress. +// The multiaddr should be in the format: /ip4/x.x.x.x/tcp/port/... +// Returns the overlay address of the connected peer. +func (n *NodeService) Connect(ctx context.Context, multiaddr string) (resp ConnectResponse, err error) { + err = n.client.requestJSON(ctx, http.MethodPost, "/connect"+multiaddr, nil, &resp) + return resp, err +} + +// Disconnect disconnects from a peer with the given overlay address. +func (n *NodeService) Disconnect(ctx context.Context, overlay swarm.Address) error { + return n.client.requestJSON(ctx, http.MethodDelete, "/peers/"+overlay.String(), nil, nil) +} + // Readiness represents node's readiness type Readiness struct { Status string `json:"status"` diff --git a/pkg/bee/client.go b/pkg/bee/client.go index d1404c50..66cbf4d2 100644 --- a/pkg/bee/client.go +++ b/pkg/bee/client.go @@ -318,6 +318,30 @@ func (c *Client) Peers(ctx context.Context) (peers []swarm.Address, err error) { return peers, err } +// Connect connects to a peer using the provided multiaddress. +// Returns the overlay address of the connected peer. +func (c *Client) Connect(ctx context.Context, multiaddr string) (swarm.Address, error) { + resp, err := c.api.Node.Connect(ctx, multiaddr) + if err != nil { + return swarm.ZeroAddress, fmt.Errorf("connect to %s: %w", multiaddr, err) + } + + addr, err := swarm.ParseHexAddress(resp.Address) + if err != nil { + return swarm.ZeroAddress, fmt.Errorf("parse overlay address %s: %w", resp.Address, err) + } + + return addr, nil +} + +// Disconnect disconnects from a peer with the given overlay address. +func (c *Client) Disconnect(ctx context.Context, overlay swarm.Address) error { + if err := c.api.Node.Disconnect(ctx, overlay); err != nil { + return fmt.Errorf("disconnect from %s: %w", overlay, err) + } + return nil +} + // PinRootHash pins root hash of given reference. func (c *Client) PinRootHash(ctx context.Context, ref swarm.Address) error { return c.api.Pinning.PinRootHash(ctx, ref) diff --git a/pkg/check/autotls/autotls.go b/pkg/check/autotls/autotls.go new file mode 100644 index 00000000..f808141a --- /dev/null +++ b/pkg/check/autotls/autotls.go @@ -0,0 +1,270 @@ +package autotls + +import ( + "context" + "fmt" + "time" + + "github.com/ethersphere/beekeeper/pkg/bee" + "github.com/ethersphere/beekeeper/pkg/beekeeper" + "github.com/ethersphere/beekeeper/pkg/logging" + "github.com/ethersphere/beekeeper/pkg/orchestration" + ma "github.com/multiformats/go-multiaddr" +) + +type Options struct { + WSSGroup string + UltraLightGroup string + ConnectTimeout time.Duration +} + +func NewDefaultOptions() Options { + return Options{ + WSSGroup: "wss", + UltraLightGroup: "ultralight", + ConnectTimeout: 30 * time.Second, + } +} + +var _ beekeeper.Action = (*Check)(nil) + +type Check struct { + logger logging.Logger +} + +func NewCheck(logger logging.Logger) beekeeper.Action { + return &Check{ + logger: logger, + } +} + +func (c *Check) Run(ctx context.Context, cluster orchestration.Cluster, opts any) error { + o, ok := opts.(Options) + if !ok { + return fmt.Errorf("invalid options type") + } + + c.logger.Info("starting AutoTLS check") + + clients, err := cluster.NodesClients(ctx) + if err != nil { + return fmt.Errorf("get node clients: %w", err) + } + time.Sleep(5 * time.Second) + wssClients := orchestration.ClientMap(clients).FilterByNodeGroups([]string{o.WSSGroup}) + if len(wssClients) == 0 { + return fmt.Errorf("no nodes found in WSS group %q", o.WSSGroup) + } + + c.logger.Infof("found %d nodes in WSS group %q", len(wssClients), o.WSSGroup) + + wssNodes, err := c.verifyWSSUnderlays(ctx, wssClients, o.UltraLightGroup) + if err != nil { + return fmt.Errorf("verify WSS underlays: %w", err) + } + + if err := c.testWSSConnectivity(ctx, clients, wssNodes, o.ConnectTimeout); err != nil { + return fmt.Errorf("WSS connectivity test: %w", err) + } + + if o.UltraLightGroup != "" { + if err := c.testUltraLightConnectivity(ctx, clients, wssNodes, o.UltraLightGroup, o.ConnectTimeout); err != nil { + return fmt.Errorf("ultra-light connectivity test: %w", err) + } + } + + if err := c.testCertificateRenewal(ctx, clients, wssNodes, o.ConnectTimeout); err != nil { + return fmt.Errorf("certificate renewal test: %w", err) + } + + c.logger.Info("AutoTLS check completed successfully") + return nil +} + +func (c *Check) verifyWSSUnderlays(ctx context.Context, wssClients orchestration.ClientList, excludeNodeGroup string) (map[string][]string, error) { + wssNodes := make(map[string][]string) + + for _, client := range wssClients { + if excludeNodeGroup != "" && client.NodeGroup() == excludeNodeGroup { + c.logger.Debugf("skipping %s (node group %s has no WSS underlays)", client.Name(), excludeNodeGroup) + continue + } + + nodeName := client.Name() + addresses, err := client.Addresses(ctx) + if err != nil { + return nil, fmt.Errorf("%s: get addresses: %w", nodeName, err) + } + time.Sleep(2 * time.Second) + wssUnderlays := filterWSSUnderlays(addresses.Underlay) + if len(wssUnderlays) == 0 { + return nil, fmt.Errorf("node %s in WSS group has no WSS underlay addresses", nodeName) + } + + wssNodes[nodeName] = wssUnderlays + c.logger.Debugf("node %s has %d WSS underlay(s)", nodeName, len(wssUnderlays)) + } + + return wssNodes, nil +} + +func filterWSSUnderlays(underlays []string) []string { + var wss []string + for _, u := range underlays { + maddr, err := ma.NewMultiaddr(u) + if err != nil { + continue + } + if _, err := maddr.ValueForProtocol(ma.P_TLS); err != nil { + continue + } + if _, err := maddr.ValueForProtocol(ma.P_WS); err != nil { + continue + } + wss = append(wss, u) + } + return wss +} + +func (c *Check) testWSSConnectivity(ctx context.Context, clients map[string]*bee.Client, wssNodes map[string][]string, timeout time.Duration) error { + var nonWSSSource *bee.Client + var nonWSSName string + var wssSource *bee.Client + var wssSourceName string + + for name, client := range clients { + if _, hasWSS := wssNodes[name]; hasWSS { + if wssSource == nil { + wssSource = client + wssSourceName = name + } + } else { + if nonWSSSource == nil { + nonWSSSource = client + nonWSSName = name + } + } + } + + if nonWSSSource != nil { + c.logger.Infof("testing cross-protocol: %s (non-WSS) to WSS nodes", nonWSSName) + if err := c.testConnectivity(ctx, nonWSSSource, nonWSSName, clients, wssNodes, timeout); err != nil { + return fmt.Errorf("cross-protocol test: %w", err) + } + } else { + c.logger.Warning("no non-WSS nodes available, skipping cross-protocol test") + } + + if wssSource != nil { + c.logger.Infof("testing WSS-to-WSS: %s to WSS nodes", wssSourceName) + if err := c.testConnectivity(ctx, wssSource, wssSourceName, clients, wssNodes, timeout); err != nil { + return fmt.Errorf("WSS-to-WSS test: %w", err) + } + } else { + c.logger.Warning("no WSS source nodes available, skipping WSS-to-WSS test") + } + + return nil +} + +func (c *Check) testUltraLightConnectivity(ctx context.Context, clients map[string]*bee.Client, wssNodes map[string][]string, ultraLightGroup string, timeout time.Duration) error { + ultralightClients := orchestration.ClientMap(clients).FilterByNodeGroups([]string{ultraLightGroup}) + if len(ultralightClients) == 0 { + c.logger.Warningf("no nodes found in ultra-light group %q, skipping ultra-light connectivity test", ultraLightGroup) + return nil + } + + c.logger.Infof("found %d nodes in ultra-light group %q", len(ultralightClients), ultraLightGroup) + + for _, client := range ultralightClients { + nodeName := client.Name() + c.logger.Infof("testing ultra-light to WSS: %s (no listen addr) to WSS nodes", nodeName) + if err := c.testConnectivity(ctx, client, nodeName, clients, wssNodes, timeout); err != nil { + return fmt.Errorf("ultra-light %s to WSS test: %w", nodeName, err) + } + } + + return nil +} + +func (c *Check) testConnectivity(ctx context.Context, sourceClient *bee.Client, sourceName string, clients map[string]*bee.Client, wssNodes map[string][]string, timeout time.Duration) error { + for targetName, underlays := range wssNodes { + if targetName == sourceName { + continue + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + targetClient := clients[targetName] + targetAddresses, err := targetClient.Addresses(ctx) + if err != nil { + return fmt.Errorf("get target %s addresses: %w", targetName, err) + } + targetOverlay := targetAddresses.Overlay + + // Disconnect first to ensure we test actual WSS connection. + // Bee returns 200 OK for both new connections and existing ones, + // so we must disconnect first to guarantee WSS transport is used. + c.logger.Infof("disconnecting from %s before WSS test", targetName) + if err := sourceClient.Disconnect(ctx, targetOverlay); err != nil { + c.logger.Warningf("failed to disconnect from %s: %v", targetName, err) + } + + time.Sleep(500 * time.Millisecond) + + for _, underlay := range underlays { + c.logger.Infof("testing WSS connection from %s to %s via %s", sourceName, targetName, underlay) + + connectCtx, cancel := context.WithTimeout(ctx, timeout) + start := time.Now() + + overlay, err := sourceClient.Connect(connectCtx, underlay) + duration := time.Since(start) + cancel() + + if err != nil { + return fmt.Errorf("WSS connection failed from %s to %s via %s: %w", sourceName, targetName, underlay, err) + } + + c.logger.Infof("WSS connection successful: %s to %s (overlay: %s, duration: %v)", + sourceName, targetName, overlay, duration) + + if !overlay.Equal(targetOverlay) { + return fmt.Errorf("overlay mismatch: expected %s, got %s", targetOverlay, overlay) + } + + if err := sourceClient.Disconnect(ctx, overlay); err != nil { + c.logger.Warningf("failed to disconnect from %s: %v", targetName, err) + } + + time.Sleep(500 * time.Millisecond) + } + } + + return nil +} + +func (c *Check) testCertificateRenewal(ctx context.Context, clients map[string]*bee.Client, wssNodes map[string][]string, connectTimeout time.Duration) error { + const renewalWaitTime = 500 * time.Second // This is configured in beelocal setup (we set certificate to expire in 300 seconds) + + c.logger.Infof("testing certificate renewal: waiting %v then re-testing connectivity", renewalWaitTime) + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(renewalWaitTime): + } + + c.logger.Info("wait complete, re-testing WSS connectivity to verify certificates were renewed") + + if err := c.testWSSConnectivity(ctx, clients, wssNodes, connectTimeout); err != nil { + return fmt.Errorf("post-renewal connectivity test failed (certificates may not have been renewed): %w", err) + } + + c.logger.Info("certificate renewal test passed: WSS connectivity still works after wait period") + return nil +} diff --git a/pkg/config/check.go b/pkg/config/check.go index b130c6e4..5543d172 100644 --- a/pkg/config/check.go +++ b/pkg/config/check.go @@ -8,6 +8,7 @@ import ( "github.com/ethersphere/beekeeper/pkg/beekeeper" "github.com/ethersphere/beekeeper/pkg/check/act" + "github.com/ethersphere/beekeeper/pkg/check/autotls" "github.com/ethersphere/beekeeper/pkg/check/balances" "github.com/ethersphere/beekeeper/pkg/check/cashout" "github.com/ethersphere/beekeeper/pkg/check/datadurability" @@ -82,6 +83,25 @@ var Checks = map[string]CheckType{ return opts, nil }, }, + "autotls": { + NewAction: autotls.NewCheck, + NewOptions: func(checkGlobalConfig CheckGlobalConfig, check Check) (any, error) { + checkOpts := new(struct { + WSSGroup *string `yaml:"wss-group"` + UltraLightGroup *string `yaml:"ultralight-group"` + ConnectTimeout *time.Duration `yaml:"connect-timeout"` + }) + if err := check.Options.Decode(checkOpts); err != nil { + return nil, fmt.Errorf("decoding check %s options: %w", check.Type, err) + } + opts := autotls.NewDefaultOptions() + + if err := applyCheckConfig(checkGlobalConfig, checkOpts, &opts); err != nil { + return nil, fmt.Errorf("applying options: %w", err) + } + return opts, nil + }, + }, "balances": { NewAction: balances.NewCheck, NewOptions: func(checkGlobalConfig CheckGlobalConfig, check Check) (any, error) {