diff --git a/charts/csi-cloudscale/templates/daemonset.yaml b/charts/csi-cloudscale/templates/daemonset.yaml index 3695ef82..18a46a48 100644 --- a/charts/csi-cloudscale/templates/daemonset.yaml +++ b/charts/csi-cloudscale/templates/daemonset.yaml @@ -53,6 +53,7 @@ spec: args : - "--endpoint=$(CSI_ENDPOINT)" - "--url=$(CLOUDSCALE_API_URL)" + - "--log-level={{ .Values.node.logLevel }}" {{- with .Values.node.resources }} resources: {{ toYaml . | indent 12 }} diff --git a/charts/csi-cloudscale/templates/statefulset.yaml b/charts/csi-cloudscale/templates/statefulset.yaml index 187c619f..293aad5d 100644 --- a/charts/csi-cloudscale/templates/statefulset.yaml +++ b/charts/csi-cloudscale/templates/statefulset.yaml @@ -76,6 +76,7 @@ spec: args : - "--endpoint=$(CSI_ENDPOINT)" - "--url=$(CLOUDSCALE_API_URL)" + - "--log-level={{ .Values.controller.logLevel }}" {{- with .Values.controller.resources }} resources: {{ toYaml . | indent 12 }} diff --git a/charts/csi-cloudscale/values.yaml b/charts/csi-cloudscale/values.yaml index 8bec898b..be06c75e 100644 --- a/charts/csi-cloudscale/values.yaml +++ b/charts/csi-cloudscale/values.yaml @@ -87,6 +87,7 @@ controller: tag: v3.5.6 pullPolicy: IfNotPresent serviceAccountName: + logLevel: info resources: {} # limits: # cpu: 100m @@ -104,6 +105,7 @@ node: nodeSelector: {} tolerations: [] serviceAccountName: + logLevel: info resources: {} # limits: # cpu: 100m diff --git a/cmd/cloudscale-csi-plugin/main.go b/cmd/cloudscale-csi-plugin/main.go index c869356f..b4a40758 100644 --- a/cmd/cloudscale-csi-plugin/main.go +++ b/cmd/cloudscale-csi-plugin/main.go @@ -24,6 +24,7 @@ import ( "os" "github.com/cloudscale-ch/csi-cloudscale/driver" + "github.com/sirupsen/logrus" ) func main() { @@ -32,6 +33,7 @@ func main() { token = flag.String("token", "", "cloudscale.ch access token") url = flag.String("url", "https://api.cloudscale.ch/", "cloudscale.ch API URL") version = flag.Bool("version", false, "Print the version and exit.") + logLevel = flag.String("log-level", "info", "Log level (trace, debug, info, warn, error, fatal, panic)") ) flag.Parse() @@ -44,7 +46,12 @@ func main() { os.Exit(0) } - drv, err := driver.NewDriver(*endpoint, *token, *url) + level, err := logrus.ParseLevel(*logLevel) + if err != nil { + log.Fatalf("invalid log level %q: %v", *logLevel, err) + } + + drv, err := driver.NewDriver(*endpoint, *token, *url, level) if err != nil { log.Fatalln(err) } diff --git a/driver/driver.go b/driver/driver.go index 4faaf7f9..37b7cfce 100644 --- a/driver/driver.go +++ b/driver/driver.go @@ -48,10 +48,9 @@ var ( // Driver implements the following CSI interfaces: // -// csi.IdentityServer -// csi.ControllerServer -// csi.NodeServer -// +// csi.IdentityServer +// csi.ControllerServer +// csi.NodeServer type Driver struct { endpoint string serverId string @@ -71,7 +70,7 @@ type Driver struct { // NewDriver returns a CSI plugin that contains the necessary gRPC // interfaces to interact with Kubernetes over unix domain sockets for // managaing cloudscale.ch Volumes -func NewDriver(ep, token, urlstr string) (*Driver, error) { +func NewDriver(ep, token, urlstr string, logLevel logrus.Level) (*Driver, error) { tokenSource := oauth2.StaticTokenSource(&oauth2.Token{ AccessToken: token, }) @@ -95,7 +94,9 @@ func NewDriver(ep, token, urlstr string) (*Driver, error) { } cloudscaleClient.BaseURL = baseURL - log := logrus.New().WithFields(logrus.Fields{ + logger := logrus.New() + logger.SetLevel(logLevel) + log := logger.WithFields(logrus.Fields{ "zone": zone, "node_id": serverId, "version": version, @@ -172,7 +173,9 @@ func (d *Driver) Stop() { // When building any packages that import version, pass the build/install cmd // ldflags like so: -// go build -ldflags "-X github.com/cloudscale-ch/csi-cloudscale/driver.version=0.0.1" +// +// go build -ldflags "-X github.com/cloudscale-ch/csi-cloudscale/driver.version=0.0.1" +// // GetVersion returns the current release version, as inserted at build time. func GetVersion() string { return version diff --git a/driver/driver_test.go b/driver/driver_test.go index d798c422..02a62f6b 100644 --- a/driver/driver_test.go +++ b/driver/driver_test.go @@ -20,8 +20,6 @@ package driver import ( "context" "errors" - "github.com/google/uuid" - "k8s.io/mount-utils" "math/rand" "net/http" "net/url" @@ -30,9 +28,12 @@ import ( "testing" "time" - "github.com/cloudscale-ch/cloudscale-go-sdk/v4" + "github.com/google/uuid" "github.com/kubernetes-csi/csi-test/v5/pkg/sanity" "github.com/sirupsen/logrus" + "k8s.io/mount-utils" + + "github.com/cloudscale-ch/cloudscale-go-sdk/v4" ) func init() { @@ -172,9 +173,9 @@ func (f *fakeMounter) HasRequiredSize(log *logrus.Entry, path string, requiredSi return true, nil } -func (f *fakeMounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, target string) (*string, error) { +func (f *fakeMounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, target string) (string, error) { path := "SomePath" - return &path, nil + return path, nil } type FakeVolumeServiceOperations struct { diff --git a/driver/mounter.go b/driver/mounter.go index 4505e0b6..921a608f 100644 --- a/driver/mounter.go +++ b/driver/mounter.go @@ -18,22 +18,23 @@ limitations under the License. package driver import ( + "context" "encoding/json" "errors" "fmt" - "io/ioutil" - "k8s.io/mount-utils" - kexec "k8s.io/utils/exec" "os" "os/exec" "path/filepath" "strconv" "strings" + "sync" "syscall" "time" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "k8s.io/mount-utils" + kexec "k8s.io/utils/exec" ) const ( @@ -86,7 +87,7 @@ type Mounter interface { // Used to find a path in /dev/disk/by-id with a serial that we have from // the cloudscale API. - FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, VolumeId string) (*string, error) + FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, VolumeId string) (string, error) // GetStatistics returns capacity-related volume statistics for the given // volume path. @@ -221,7 +222,29 @@ func (m *mounter) Mount(source, target, fsType string, luksContext LuksContext, source = luksSource } + // Resolve source symlink for debug logging + resolvedSource, resolveErr := filepath.EvalSymlinks(source) + if resolveErr != nil { + m.log.WithFields(logrus.Fields{ + "source": source, + "target": target, + "fs_type": fsType, + "options": options, + "resolve_error": resolveErr, + }).Debug("Mount: failed to resolve source symlink") + } else { + m.log.WithFields(logrus.Fields{ + "source": source, + "resolved_source": resolvedSource, + "target": target, + "fs_type": fsType, + "options": options, + }).Debug("Mount: resolved source device") + } + m.log.WithFields(logrus.Fields{ + "source": source, + "target": target, "options": options, }).Info("executing mount command") err := m.kMounter.Mount(source, target, fsType, options) @@ -418,7 +441,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -func guessDiskIDPathByVolumeID(volumeID string) *string { +func guessDiskIDPathByVolumeID(volumeID string, logger *logrus.Entry) string { // Get the first part of the UUID. // The linux kernel limits volume serials to 20 bytes: // include/uapi/linux/virtio_blk.h:#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ @@ -426,64 +449,103 @@ func guessDiskIDPathByVolumeID(volumeID string) *string { globExpr := diskIDPath + "/*" + linuxSerial + "*" matches, _ := filepath.Glob(globExpr) + + logger.WithFields(logrus.Fields{ + "volumeID": volumeID, + "linuxSerial": linuxSerial, + "matches": matches, + }).Debug("guessDiskIDPathByVolumeID") + if len(matches) > 0 { - return &matches[0] + return matches[0] } - return nil + return "" } -func (m *mounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, volumeID string) (*string, error) { +func (m *mounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, volumeID string) (string, error) { numTries := 0 for { - probeAttachedVolume(logger) - - diskIDPath := guessDiskIDPathByVolumeID(volumeID) - if diskIDPath != nil { + diskIDPath := guessDiskIDPathByVolumeID(volumeID, logger) + if diskIDPath != "" { + // Resolve and log the actual device for debugging + resolved, err := filepath.EvalSymlinks(diskIDPath) + if err != nil { + logger.WithFields(logrus.Fields{ + "disk_id_path": diskIDPath, + "error": err, + }).Debug("FinalizeVolumeAttachmentAndFindPath: found path but failed to resolve symlink") + } else { + logger.WithFields(logrus.Fields{ + "disk_id_path": diskIDPath, + "resolved_device": resolved, + "num_tries": numTries, + }).Debug("FinalizeVolumeAttachmentAndFindPath: found device path") + } return diskIDPath, nil } + logger.WithFields(logrus.Fields{ + "num_tries": numTries, + }).Debug("FinalizeVolumeAttachmentAndFindPath: device not found, probing") + + probeAttachedVolume(logger) + numTries++ - if numTries == 10 { + if numTries == 30 { break } time.Sleep(time.Second) } - return nil, errors.New("Could not attach disk: Timeout after 10s") + return "", errors.New("FinalizeVolumeAttachmentAndFindPath: Timeout after 30s") } -func probeAttachedVolume(logger *logrus.Entry) error { - // rescan scsi bus - scsiHostRescan() +func runCmdWithTimeout(name string, args []string, logger *logrus.Entry, timeout time.Duration) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() - // udevadm settle waits for udevd to process the device creation - // events for all hardware devices, thus ensuring that any device - // nodes have been created successfully before proceeding. - argsSettle := []string{"settle"} - cmdSettle := exec.Command("udevadm", argsSettle...) - _, errSettle := cmdSettle.CombinedOutput() - if errSettle != nil { - logger.Errorf("error running udevadm settle %v\n", errSettle) + out, err := exec.CommandContext(ctx, name, args...).CombinedOutput() + if err != nil { + logger.WithError(err). + WithFields(logrus.Fields{"out": out, "name": name, "args": args}). + Warn("unable to run cmd " + name) } +} + +var probeLock sync.Mutex + +func probeAttachedVolume(logger *logrus.Entry) { + const triggerTimeout = 15 * time.Second + + // host rescan and udevadm are global actions and if run concurrently, may run into issues with + // symlinking and partial updates. + probeLock.Lock() + defer probeLock.Unlock() + + // rescan scsi bus + logger.Debug("probeAttachedVolume: rescanning SCSI hosts") + scsiHostRescan(logger) - args := []string{"trigger"} - cmd := exec.Command("udevadm", args...) - _, err := cmd.CombinedOutput() + logger.Debug("probeAttachedVolume: running udevadm trigger") + runCmdWithTimeout("udevadm", []string{"trigger"}, logger, triggerTimeout) + + logger.Debug("probeAttachedVolume: running udevadm settle") + runCmdWithTimeout("udevadm", []string{"settle"}, logger, triggerTimeout) + + logger.Debugf("probeAttachedVolume: done") +} + +func scsiHostRescan(logger *logrus.Entry) { + const scsiPath = "/sys/class/scsi_host/" + dirs, err := os.ReadDir(scsiPath) if err != nil { - logger.Errorf("error running udevadm trigger %v\n", err) - return err + logger.WithError(err).Warn("scsiHostRescan: cannot read scsi_host directory") + return } - logger.Debugf("Successfully probed all attachments") - return nil -} -func scsiHostRescan() { - scsiPath := "/sys/class/scsi_host/" - if dirs, err := ioutil.ReadDir(scsiPath); err == nil { - for _, f := range dirs { - name := scsiPath + f.Name() + "/scan" - data := []byte("- - -") - ioutil.WriteFile(name, data, 0666) - } + for _, f := range dirs { + name := scsiPath + f.Name() + "/scan" + data := []byte("- - -") + _ = os.WriteFile(name, data, 0666) } } @@ -494,20 +556,20 @@ func (m *mounter) GetDeviceName(mounter mount.Interface, mountPath string) (stri // FindAbsoluteDeviceByIDPath follows the /dev/disk/by-id symlink to find the absolute path of a device func (m *mounter) FindAbsoluteDeviceByIDPath(volumeName string) (string, error) { - path := guessDiskIDPathByVolumeID(volumeName) - if path == nil { + path := guessDiskIDPathByVolumeID(volumeName, m.log) + if path == "" { return "", fmt.Errorf("could not find device-path for volume: %s", volumeName) } // EvalSymlinks returns relative link if the file is not a symlink // so we do not have to check if it is symlink prior to evaluation - resolved, err := filepath.EvalSymlinks(*path) + resolved, err := filepath.EvalSymlinks(path) if err != nil { - return "", fmt.Errorf("could not resolve symlink %q: %v", *path, err) + return "", fmt.Errorf("could not resolve symlink %q: %v", path, err) } if !strings.HasPrefix(resolved, "/dev") { - return "", fmt.Errorf("resolved symlink %q for %q was unexpected", resolved, *path) + return "", fmt.Errorf("resolved symlink %q for %q was unexpected", resolved, path) } return resolved, nil diff --git a/driver/node.go b/driver/node.go index d0d46c60..9d2fc69b 100644 --- a/driver/node.go +++ b/driver/node.go @@ -27,14 +27,16 @@ package driver import ( "context" + "os" + "path/filepath" + "strconv" + "github.com/container-storage-interface/spec/lib/go/csi" "github.com/sirupsen/logrus" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "k8s.io/mount-utils" utilexec "k8s.io/utils/exec" - "os" - "strconv" ) const ( @@ -71,11 +73,26 @@ func (d *Driver) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRe // Apparently sometimes we need to call udevadm trigger to get the volume // properly registered in /dev/disk. More information can be found here: // https://github.com/cloudscale-ch/csi-cloudscale/issues/9 - sourcePtr, err := d.mounter.FinalizeVolumeAttachmentAndFindPath(d.log.WithFields(logrus.Fields{"volume_id": req.VolumeId}), req.VolumeId) + source, err := d.mounter.FinalizeVolumeAttachmentAndFindPath(d.log.WithFields(logrus.Fields{"volume_id": req.VolumeId}), req.VolumeId) if err != nil { return nil, err } - source := *sourcePtr + + // Debug logging to help diagnose potential race conditions with concurrent volume mounts + resolvedSource, resolveErr := filepath.EvalSymlinks(source) + if resolveErr != nil { + d.log.WithFields(logrus.Fields{ + "volume_id": req.VolumeId, + "source": source, + "resolve_error": resolveErr, + }).Debug("failed to resolve source symlink") + } else { + d.log.WithFields(logrus.Fields{ + "volume_id": req.VolumeId, + "source_symlink": source, + "resolved_device": resolvedSource, + }).Debug("resolved source device path") + } publishContext := req.GetPublishContext() if publishContext == nil {