diff --git a/vgpu-manager/rhel8/nvidia-driver b/vgpu-manager/rhel8/nvidia-driver index 26d8a9f5..0cdb8523 100755 --- a/vgpu-manager/rhel8/nvidia-driver +++ b/vgpu-manager/rhel8/nvidia-driver @@ -7,6 +7,8 @@ DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"} DRIVER_RESET_RETRIES=10 DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15} RUN_DIR=/run/nvidia +NVIDIA_MODULE_PARAMS=() +MODPROBE_CONFIG_DIR="/etc/modprobe.d" # Mount the driver rootfs into the run directory with the exception of sysfs. _mount_rootfs() { @@ -52,14 +54,59 @@ _set_fw_search_path() { echo -n "$nv_fw_search_path" > $fw_path_config_file } +# For each kernel module configuration file mounted into the container, +# parse the file contents and extract the custom module parameters that +# are to be passed as input to 'modprobe'. +# +# Assumptions: +# - Configuration file is named nvidia.conf +# - Configuration file is mounted inside the container at /drivers. +# - Each line in the file contains at least one parameter, where parameters on the same line +# are space delimited. It is up to the user to properly format the file to ensure +# the correct set of parameters are passed to 'modprobe'. +_get_module_params() { + local base_path="/drivers" + # nvidia + if [ -f "${base_path}/nvidia.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia.conf" + echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}" + fi +} + _install_driver() { local tmp_dir=$(mktemp -d) sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd } -# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons +_create_module_params_conf() { + echo "Parsing kernel module parameters..." + _get_module_params + + if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf" + echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf + fi +} + +# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons _load_driver() { + # Unload modules if they're already loaded so we can reload with custom parameters + if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ -f /sys/module/nvidia/refcnt ]; then + echo "NVIDIA modules already loaded by installer, unloading to apply custom parameters..." + rmmod nvidia_vgpu_vfio 2>/dev/null || true + rmmod nvidia 2>/dev/null || true + fi + + echo "Loading NVIDIA driver kernel modules..." + set -o xtrace +o nounset + modprobe nvidia + modprobe nvidia_vgpu_vfio + set +o xtrace -o nounset + + # Start vGPU daemons /usr/bin/nvidia-vgpud /usr/bin/nvidia-vgpu-mgr & @@ -181,6 +228,7 @@ init() { _unmount_rootfs _create_dev_char_directory _set_fw_search_path + _create_module_params_conf _install_driver _load_driver || exit 1 _mount_rootfs diff --git a/vgpu-manager/rhel9/nvidia-driver b/vgpu-manager/rhel9/nvidia-driver index 4285dddc..5ab5c860 100755 --- a/vgpu-manager/rhel9/nvidia-driver +++ b/vgpu-manager/rhel9/nvidia-driver @@ -19,6 +19,8 @@ DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"} DRIVER_RESET_RETRIES=10 DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15} RUN_DIR=/run/nvidia +NVIDIA_MODULE_PARAMS=() +MODPROBE_CONFIG_DIR="/etc/modprobe.d" # Mount the driver rootfs into the run directory with the exception of sysfs. _mount_rootfs() { @@ -64,14 +66,59 @@ _set_fw_search_path() { echo -n "$nv_fw_search_path" > $fw_path_config_file } +# For each kernel module configuration file mounted into the container, +# parse the file contents and extract the custom module parameters that +# are to be passed as input to 'modprobe'. +# +# Assumptions: +# - Configuration file is named nvidia.conf +# - Configuration file is mounted inside the container at /drivers. +# - Each line in the file contains at least one parameter, where parameters on the same line +# are space delimited. It is up to the user to properly format the file to ensure +# the correct set of parameters are passed to 'modprobe'. +_get_module_params() { + local base_path="/drivers" + # nvidia + if [ -f "${base_path}/nvidia.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia.conf" + echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}" + fi +} + _install_driver() { local tmp_dir=$(mktemp -d) sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd } -# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons +_create_module_params_conf() { + echo "Parsing kernel module parameters..." + _get_module_params + + if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf" + echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf + fi +} + +# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons _load_driver() { + # Unload modules if they're already loaded so we can reload with custom parameters + if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ -f /sys/module/nvidia/refcnt ]; then + echo "NVIDIA modules already loaded by installer, unloading to apply custom parameters..." + rmmod nvidia_vgpu_vfio 2>/dev/null || true + rmmod nvidia 2>/dev/null || true + fi + + echo "Loading NVIDIA driver kernel modules..." + set -o xtrace +o nounset + modprobe nvidia + modprobe nvidia_vgpu_vfio + set +o xtrace -o nounset + + # Start vGPU daemons /usr/bin/nvidia-vgpud /usr/bin/nvidia-vgpu-mgr & @@ -193,6 +240,7 @@ init() { _unmount_rootfs _create_dev_char_directory _set_fw_search_path + _create_module_params_conf _install_driver _load_driver || exit 1 _mount_rootfs diff --git a/vgpu-manager/ubuntu22.04/nvidia-driver b/vgpu-manager/ubuntu22.04/nvidia-driver index 6f13038f..0de743a9 100755 --- a/vgpu-manager/ubuntu22.04/nvidia-driver +++ b/vgpu-manager/ubuntu22.04/nvidia-driver @@ -8,6 +8,8 @@ DRIVER_RESET_RETRIES=10 DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15} KERNEL_VERSION=$(uname -r) RUN_DIR=/run/nvidia +NVIDIA_MODULE_PARAMS=() +MODPROBE_CONFIG_DIR="/etc/modprobe.d" export DEBIAN_FRONTEND=noninteractive @@ -133,8 +135,53 @@ _set_fw_search_path() { echo -n "$nv_fw_search_path" > $fw_path_config_file } -# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons +# For each kernel module configuration file mounted into the container, +# parse the file contents and extract the custom module parameters that +# are to be passed as input to 'modprobe'. +# +# Assumptions: +# - Configuration file is named nvidia.conf +# - Configuration file is mounted inside the container at /drivers. +# - Each line in the file contains at least one parameter, where parameters on the same line +# are space delimited. It is up to the user to properly format the file to ensure +# the correct set of parameters are passed to 'modprobe'. +_get_module_params() { + local base_path="/drivers" + # nvidia + if [ -f "${base_path}/nvidia.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia.conf" + echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}" + fi +} + +_create_module_params_conf() { + echo "Parsing kernel module parameters..." + _get_module_params + + if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf" + echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf + fi +} + +# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons _load_driver() { + # Unload modules if they're already loaded so we can reload with custom parameters + if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ -f /sys/module/nvidia/refcnt ]; then + echo "NVIDIA modules already loaded by installer, unloading to apply custom parameters..." + rmmod nvidia_vgpu_vfio 2>/dev/null || true + rmmod nvidia 2>/dev/null || true + fi + + echo "Loading NVIDIA driver kernel modules..." + set -o xtrace +o nounset + modprobe nvidia + modprobe nvidia_vgpu_vfio + set +o xtrace -o nounset + + # Start vGPU daemons /usr/bin/nvidia-vgpud /usr/bin/nvidia-vgpu-mgr & @@ -260,6 +307,7 @@ init() { _install_prerequisites _create_dev_char_directory _set_fw_search_path + _create_module_params_conf _install_driver _load_driver || exit 1 _mount_rootfs diff --git a/vgpu-manager/ubuntu24.04/nvidia-driver b/vgpu-manager/ubuntu24.04/nvidia-driver index bce3d1ca..f7b440bd 100644 --- a/vgpu-manager/ubuntu24.04/nvidia-driver +++ b/vgpu-manager/ubuntu24.04/nvidia-driver @@ -8,6 +8,8 @@ DRIVER_RESET_RETRIES=10 DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15} KERNEL_VERSION=$(uname -r) RUN_DIR=/run/nvidia +NVIDIA_MODULE_PARAMS=() +MODPROBE_CONFIG_DIR="/etc/modprobe.d" export DEBIAN_FRONTEND=noninteractive @@ -133,8 +135,53 @@ _install_driver() { sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd } -# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons +# For each kernel module configuration file mounted into the container, +# parse the file contents and extract the custom module parameters that +# are to be passed as input to 'modprobe'. +# +# Assumptions: +# - Configuration file is named nvidia.conf +# - Configuration file is mounted inside the container at /drivers. +# - Each line in the file contains at least one parameter, where parameters on the same line +# are space delimited. It is up to the user to properly format the file to ensure +# the correct set of parameters are passed to 'modprobe'. +_get_module_params() { + local base_path="/drivers" + # nvidia + if [ -f "${base_path}/nvidia.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia.conf" + echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}" + fi +} + +_create_module_params_conf() { + echo "Parsing kernel module parameters..." + _get_module_params + + if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf" + echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf + fi +} + +# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons _load_driver() { + # Unload modules if they're already loaded so we can reload with custom parameters + if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ -f /sys/module/nvidia/refcnt ]; then + echo "NVIDIA modules already loaded by installer, unloading to apply custom parameters..." + rmmod nvidia_vgpu_vfio 2>/dev/null || true + rmmod nvidia 2>/dev/null || true + fi + + echo "Loading NVIDIA driver kernel modules..." + set -o xtrace +o nounset + modprobe nvidia + modprobe nvidia_vgpu_vfio + set +o xtrace -o nounset + + # Start vGPU daemons /usr/bin/nvidia-vgpud /usr/bin/nvidia-vgpu-mgr & @@ -260,6 +307,7 @@ init() { _install_prerequisites _create_dev_char_directory _set_fw_search_path + _create_module_params_conf _install_driver _load_driver || exit 1 _mount_rootfs