From 6f526069da5d94cb5cdc403e8d42705f2ac73d49 Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Fri, 6 Mar 2026 15:55:13 +0100 Subject: [PATCH 1/7] feat: add per-model FP8 layerwise casting for VRAM reduction Add fp8_storage option to model default settings that enables diffusers' enable_layerwise_casting() to store weights in FP8 (float8_e4m3fn) while casting to fp16/bf16 during inference. This reduces VRAM usage by ~50% per model with minimal quality loss. Supported: SD1/SD2/SDXL/SD3, Flux, Flux2, CogView4, Z-Image, VAE (diffusers-based), ControlNet, T2IAdapter. Not applicable: Text Encoders, LoRA, GGUF, BnB, custom classes --- .../model_manager/configs/controlnet.py | 4 ++ .../backend/model_manager/configs/main.py | 4 ++ .../model_manager/load/load_default.py | 46 +++++++++++++++++++ .../load/model_loaders/cogview4.py | 1 + .../load/model_loaders/controlnet.py | 4 +- .../model_manager/load/model_loaders/flux.py | 4 ++ .../load/model_loaders/generic_diffusers.py | 1 + .../load/model_loaders/stable_diffusion.py | 6 ++- .../model_manager/load/model_loaders/vae.py | 4 +- .../load/model_loaders/z_image.py | 1 + 10 files changed, 72 insertions(+), 3 deletions(-) diff --git a/invokeai/backend/model_manager/configs/controlnet.py b/invokeai/backend/model_manager/configs/controlnet.py index 6688ec95ffd..1c73df41209 100644 --- a/invokeai/backend/model_manager/configs/controlnet.py +++ b/invokeai/backend/model_manager/configs/controlnet.py @@ -54,6 +54,10 @@ class ControlAdapterDefaultSettings(BaseModel): # This could be narrowed to controlnet processor nodes, but they change. Leaving this a string is safer. preprocessor: str | None + fp8_storage: bool | None = Field( + default=None, + description="Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference.", + ) model_config = ConfigDict(extra="forbid") @classmethod diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py index 6f737ceb92d..b8f107de127 100644 --- a/invokeai/backend/model_manager/configs/main.py +++ b/invokeai/backend/model_manager/configs/main.py @@ -51,6 +51,10 @@ class MainModelDefaultSettings(BaseModel): height: int | None = Field(default=None, multiple_of=8, ge=64, description="Default height for this model") guidance: float | None = Field(default=None, ge=1, description="Default Guidance for this model") cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") + fp8_storage: bool | None = Field( + default=None, + description="Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference.", + ) model_config = ConfigDict(extra="forbid") diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py index ea699207348..20134a99ddb 100644 --- a/invokeai/backend/model_manager/load/load_default.py +++ b/invokeai/backend/model_manager/load/load_default.py @@ -124,6 +124,52 @@ def get_size_fs( variant=config.repo_variant if isinstance(config, Diffusers_Config_Base) else None, ) + def _should_use_fp8(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> bool: + """Check if FP8 layerwise casting should be applied to a model.""" + # FP8 storage only works on CUDA + if self._torch_device.type != "cuda": + return False + + # Don't apply FP8 to text encoders, tokenizers, schedulers, etc. + _excluded_submodel_types = { + SubModelType.TextEncoder, + SubModelType.TextEncoder2, + SubModelType.TextEncoder3, + SubModelType.Tokenizer, + SubModelType.Tokenizer2, + SubModelType.Tokenizer3, + SubModelType.Scheduler, + SubModelType.SafetyChecker, + } + if submodel_type in _excluded_submodel_types: + return False + + # Check default_settings.fp8_storage (Main models, ControlNet) + if hasattr(config, "default_settings") and config.default_settings is not None: + if hasattr(config.default_settings, "fp8_storage") and config.default_settings.fp8_storage is True: + return True + + return False + + def _apply_fp8_layerwise_casting( + self, model: AnyModel, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None + ) -> AnyModel: + """Apply FP8 layerwise casting to a model if enabled in its config.""" + if not self._should_use_fp8(config, submodel_type): + return model + + from diffusers.models.modeling_utils import ModelMixin + + if not isinstance(model, ModelMixin): + return model + + model.enable_layerwise_casting( + storage_dtype=torch.float8_e4m3fn, + compute_dtype=self._torch_dtype, + ) + self._logger.info(f"FP8 layerwise casting enabled for {config.name} (storage=float8_e4m3fn, compute={self._torch_dtype})") + return model + # This needs to be implemented in the subclass def _load_model( self, diff --git a/invokeai/backend/model_manager/load/model_loaders/cogview4.py b/invokeai/backend/model_manager/load/model_loaders/cogview4.py index ee8c6d4f41d..6e8490912bc 100644 --- a/invokeai/backend/model_manager/load/model_loaders/cogview4.py +++ b/invokeai/backend/model_manager/load/model_loaders/cogview4.py @@ -55,4 +55,5 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result diff --git a/invokeai/backend/model_manager/load/model_loaders/controlnet.py b/invokeai/backend/model_manager/load/model_loaders/controlnet.py index 8fd1796b8f5..e50e45849ab 100644 --- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py +++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py @@ -45,9 +45,11 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if isinstance(config, ControlNet_Checkpoint_Config_Base): - return ControlNetModel.from_single_file( + result = ControlNetModel.from_single_file( config.path, torch_dtype=self._torch_dtype, ) + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) + return result else: return super()._load_model(config, submodel_type) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 2de51a8acae..08e15279e03 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -139,6 +139,7 @@ def _load_model( local_files_only=True, ) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) return model @@ -201,6 +202,7 @@ def _load_model( vae_dtype = self._torch_dtype model.to(vae_dtype) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) return model def _convert_flux2_vae_bfl_to_diffusers(self, sd: dict) -> dict: @@ -639,6 +641,7 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result @@ -715,6 +718,7 @@ def _load_model( if guidance_emb.linear_2.bias is not None: guidance_emb.linear_2.bias.data.zero_() + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result diff --git a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py index 2a79f604ba2..7e87869c9e3 100644 --- a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +++ b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py @@ -47,6 +47,7 @@ def _load_model( result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True) else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result # TO DO: Add exception handling diff --git a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py index 0e11cd4191d..d19d6477626 100644 --- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py @@ -90,6 +90,7 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result def _load_from_singlefile( @@ -152,5 +153,8 @@ def _load_from_singlefile( if subtype == submodel_type: continue if submodel := getattr(pipeline, subtype.value, None): + self._apply_fp8_layerwise_casting(submodel, config, subtype) self._ram_cache.put(get_model_cache_key(config.key, subtype), model=submodel) - return getattr(pipeline, submodel_type.value) + result = getattr(pipeline, submodel_type.value) + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) + return result diff --git a/invokeai/backend/model_manager/load/model_loaders/vae.py b/invokeai/backend/model_manager/load/model_loaders/vae.py index e91903ccdad..077e688a813 100644 --- a/invokeai/backend/model_manager/load/model_loaders/vae.py +++ b/invokeai/backend/model_manager/load/model_loaders/vae.py @@ -29,9 +29,11 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if isinstance(config, VAE_Checkpoint_Config_Base): - return AutoencoderKL.from_single_file( + result = AutoencoderKL.from_single_file( config.path, torch_dtype=self._torch_dtype, ) + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) + return result else: return super()._load_model(config, submodel_type) diff --git a/invokeai/backend/model_manager/load/model_loaders/z_image.py b/invokeai/backend/model_manager/load/model_loaders/z_image.py index c381e02718d..6c2102933af 100644 --- a/invokeai/backend/model_manager/load/model_loaders/z_image.py +++ b/invokeai/backend/model_manager/load/model_loaders/z_image.py @@ -163,6 +163,7 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result From bf3bd2e66f6c4d186c81a5f6d8c4dff0d2a66d40 Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Fri, 6 Mar 2026 16:46:28 +0100 Subject: [PATCH 2/7] feat: add FP8 storage option to Model Manager UI Add per-model FP8 storage toggle in Model Manager default settings for both main models and control adapter models. When enabled, model weights are stored in FP8 format in VRAM (~50% savings) and cast layer-by-layer to compute precision during inference via diffusers' enable_layerwise_casting(). Backend: add fp8_storage field to MainModelDefaultSettings and ControlAdapterDefaultSettings, apply FP8 layerwise casting in all relevant model loaders (SD, SDXL, FLUX, CogView4, Z-Image, ControlNet, T2IAdapter, VAE). Gracefully skips non-ModelMixin models (custom checkpoint loaders, GGUF, BnB). Frontend: add FP8 Storage switch to model default settings panels with InformationalPopover, translation keys, and proper form handling. --- invokeai/frontend/web/public/locales/en.json | 8 +++ .../InformationalPopover/constants.ts | 3 +- .../useControlAdapterModelDefaultSettings.ts | 4 ++ .../hooks/useMainModelDefaultSettings.ts | 4 ++ .../ControlAdapterModelDefaultSettings.tsx | 4 ++ .../DefaultFp8StorageControlAdapter.tsx | 52 +++++++++++++++++++ .../DefaultFp8Storage.tsx | 50 ++++++++++++++++++ .../MainModelDefaultSettings.tsx | 4 ++ .../frontend/web/src/services/api/schema.ts | 10 ++++ 9 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx create mode 100644 invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 2db971d06a6..56d3f68d0e9 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -974,6 +974,7 @@ "convertToDiffusersHelpText5": "Please make sure you have enough disk space. Models generally vary between 2GB-7GB in size.", "convertToDiffusersHelpText6": "Do you wish to convert this model?", "cpuOnly": "CPU Only", + "fp8Storage": "FP8 Storage (Save VRAM)", "runOnCpu": "Run text encoder model on CPU only", "noDefaultSettings": "No default settings configured for this model. Visit the Model Manager to add default settings.", "defaultSettings": "Default Settings", @@ -2139,6 +2140,13 @@ "When enabled, only the text encoder component will run on CPU instead of GPU.", "This saves VRAM for the denoiser while only slightly impacting performance. The conditioning outputs are automatically moved to GPU for the denoiser." ] + }, + "fp8Storage": { + "heading": "FP8 Storage", + "paragraphs": [ + "Stores model weights in FP8 format in VRAM, reducing memory usage by approximately 50% compared to FP16.", + "During inference, weights are cast layer-by-layer to the compute precision (FP16/BF16), so image quality is preserved. Works on all CUDA GPUs." + ] } }, "workflows": { diff --git a/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts b/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts index 95fa75cfa32..e9d855648ad 100644 --- a/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts +++ b/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts @@ -77,7 +77,8 @@ export type Feature = | 'tileOverlap' | 'optimizedDenoising' | 'fluxDevLicense' - | 'cpuOnly'; + | 'cpuOnly' + | 'fp8Storage'; export type PopoverData = PopoverProps & { image?: string; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts index 1f14c08dedc..9ab49ca889f 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts @@ -11,6 +11,10 @@ export const useControlAdapterModelDefaultSettings = ( isEnabled: !isNil(modelConfig?.default_settings?.preprocessor), value: modelConfig?.default_settings?.preprocessor || 'none', }, + fp8Storage: { + isEnabled: !isNil(modelConfig?.default_settings?.fp8_storage), + value: modelConfig?.default_settings?.fp8_storage ?? false, + }, }; }, [modelConfig?.default_settings]); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts index dfab2d251f9..10b66b7a25e 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts @@ -41,6 +41,10 @@ export const useMainModelDefaultSettings = (modelConfig: MainModelConfig) => { isEnabled: !isNil(modelConfig?.default_settings?.guidance), value: modelConfig?.default_settings?.guidance ?? 4, }, + fp8Storage: { + isEnabled: !isNil(modelConfig?.default_settings?.fp8_storage), + value: modelConfig?.default_settings?.fp8_storage ?? false, + }, }; }, [modelConfig]); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx index a5e8f10a4bc..aad70846ffd 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx @@ -1,6 +1,7 @@ import { Button, Flex, Heading, SimpleGrid } from '@invoke-ai/ui-library'; import { useControlAdapterModelDefaultSettings } from 'features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings'; import { useIsModelManagerEnabled } from 'features/modelManagerV2/hooks/useIsModelManagerEnabled'; +import { DefaultFp8StorageControlAdapter } from 'features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter'; import { DefaultPreprocessor } from 'features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultPreprocessor'; import type { FormField } from 'features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings'; import { toast } from 'features/toast/toast'; @@ -14,6 +15,7 @@ import type { ControlLoRAModelConfig, ControlNetModelConfig, T2IAdapterModelConf export type ControlAdapterModelDefaultSettingsFormData = { preprocessor: FormField; + fp8Storage: FormField; }; type Props = { @@ -40,6 +42,7 @@ export const ControlAdapterModelDefaultSettings = memo(({ modelConfig }: Props) (data) => { const body = { preprocessor: data.preprocessor.isEnabled ? data.preprocessor.value : null, + fp8_storage: data.fp8Storage.isEnabled ? data.fp8Storage.value : null, }; updateModel({ @@ -88,6 +91,7 @@ export const ControlAdapterModelDefaultSettings = memo(({ modelConfig }: Props) + ); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx new file mode 100644 index 00000000000..bfaf6e4983a --- /dev/null +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx @@ -0,0 +1,52 @@ +import { Flex, FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; +import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; +import { SettingToggle } from 'features/modelManagerV2/subpanels/ModelPanel/SettingToggle'; +import type { ChangeEvent } from 'react'; +import { memo, useCallback, useMemo } from 'react'; +import type { UseControllerProps } from 'react-hook-form'; +import { useController } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; + +import type { ControlAdapterModelDefaultSettingsFormData } from './ControlAdapterModelDefaultSettings'; + +type DefaultFp8StorageType = ControlAdapterModelDefaultSettingsFormData['fp8Storage']; + +export const DefaultFp8StorageControlAdapter = memo( + (props: UseControllerProps) => { + const { t } = useTranslation(); + const { field } = useController(props); + + const onChange = useCallback( + (e: ChangeEvent) => { + const updatedValue = { + ...(field.value as DefaultFp8StorageType), + value: e.target.checked, + }; + field.onChange(updatedValue); + }, + [field] + ); + + const value = useMemo(() => { + return (field.value as DefaultFp8StorageType).value; + }, [field.value]); + + const isDisabled = useMemo(() => { + return !(field.value as DefaultFp8StorageType).isEnabled; + }, [field.value]); + + return ( + + + + {t('modelManager.fp8Storage')} + + + + + + ); + } +); + +DefaultFp8StorageControlAdapter.displayName = 'DefaultFp8StorageControlAdapter'; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx new file mode 100644 index 00000000000..6fa3a86da5c --- /dev/null +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx @@ -0,0 +1,50 @@ +import { Flex, FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; +import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; +import { SettingToggle } from 'features/modelManagerV2/subpanels/ModelPanel/SettingToggle'; +import type { ChangeEvent } from 'react'; +import { memo, useCallback, useMemo } from 'react'; +import type { UseControllerProps } from 'react-hook-form'; +import { useController } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; + +import type { MainModelDefaultSettingsFormData } from './MainModelDefaultSettings'; + +type DefaultFp8StorageType = MainModelDefaultSettingsFormData['fp8Storage']; + +export const DefaultFp8Storage = memo((props: UseControllerProps) => { + const { t } = useTranslation(); + const { field } = useController(props); + + const onChange = useCallback( + (e: ChangeEvent) => { + const updatedValue = { + ...(field.value as DefaultFp8StorageType), + value: e.target.checked, + }; + field.onChange(updatedValue); + }, + [field] + ); + + const value = useMemo(() => { + return (field.value as DefaultFp8StorageType).value; + }, [field.value]); + + const isDisabled = useMemo(() => { + return !(field.value as DefaultFp8StorageType).isEnabled; + }, [field.value]); + + return ( + + + + {t('modelManager.fp8Storage')} + + + + + + ); +}); + +DefaultFp8Storage.displayName = 'DefaultFp8Storage'; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx index dd944897b24..91d255f5a49 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx @@ -18,6 +18,7 @@ import type { MainModelConfig } from 'services/api/types'; import { DefaultCfgRescaleMultiplier } from './DefaultCfgRescaleMultiplier'; import { DefaultCfgScale } from './DefaultCfgScale'; +import { DefaultFp8Storage } from './DefaultFp8Storage'; import { DefaultGuidance } from './DefaultGuidance'; import { DefaultScheduler } from './DefaultScheduler'; import { DefaultSteps } from './DefaultSteps'; @@ -39,6 +40,7 @@ export type MainModelDefaultSettingsFormData = { width: FormField; height: FormField; guidance: FormField; + fp8Storage: FormField; }; type Props = { @@ -85,6 +87,7 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => { width: data.width.isEnabled ? data.width.value : null, height: data.height.isEnabled ? data.height.value : null, guidance: data.guidance.isEnabled ? data.guidance.value : null, + fp8_storage: data.fp8Storage.isEnabled ? data.fp8Storage.value : null, }; updateModel({ @@ -141,6 +144,7 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => { {!isFluxFamily && } + ); diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index b605413787b..aa60743b0b4 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -5225,6 +5225,11 @@ export type components = { ControlAdapterDefaultSettings: { /** Preprocessor */ preprocessor: string | null; + /** + * Fp8 Storage + * @description Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference. + */ + fp8_storage?: boolean | null; }; /** ControlField */ ControlField: { @@ -17168,6 +17173,11 @@ export type components = { * @description Whether this model should run on CPU only */ cpu_only?: boolean | null; + /** + * Fp8 Storage + * @description Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference. + */ + fp8_storage?: boolean | null; }; /** * Main Model - SD1.5, SD2 From afe246e3ee33bc0afd6a9e8320392d34a53e4dd9 Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Fri, 6 Mar 2026 17:16:34 +0100 Subject: [PATCH 3/7] ruff format --- invokeai/backend/model_manager/load/load_default.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py index 20134a99ddb..c07166065e1 100644 --- a/invokeai/backend/model_manager/load/load_default.py +++ b/invokeai/backend/model_manager/load/load_default.py @@ -167,7 +167,9 @@ def _apply_fp8_layerwise_casting( storage_dtype=torch.float8_e4m3fn, compute_dtype=self._torch_dtype, ) - self._logger.info(f"FP8 layerwise casting enabled for {config.name} (storage=float8_e4m3fn, compute={self._torch_dtype})") + self._logger.info( + f"FP8 layerwise casting enabled for {config.name} (storage=float8_e4m3fn, compute={self._torch_dtype})" + ) return model # This needs to be implemented in the subclass From 0d7b39fa11df2abf9f84f191c366bcc657788874 Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Sat, 21 Mar 2026 04:09:31 +0100 Subject: [PATCH 4/7] fix: enable FP8 layerwise casting for checkpoint Flux models FluxCheckpointModel and Flux2CheckpointModel were missing the _apply_fp8_layerwise_casting call. Additionally, the FP8 casting only worked for diffusers ModelMixin models. Add manual layerwise casting via forward hooks for plain nn.Module (custom Flux class). Also simplify FP8 UI toggle from dual-slider to single switch, matching the CPU-only toggle pattern per review feedback on #8945. --- .../model_manager/load/load_default.py | 58 +++++++++++++++++-- .../model_manager/load/model_loaders/flux.py | 8 ++- .../DefaultFp8StorageControlAdapter.tsx | 21 +++---- .../DefaultFp8Storage.tsx | 21 +++---- 4 files changed, 72 insertions(+), 36 deletions(-) diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py index c07166065e1..244863b008d 100644 --- a/invokeai/backend/model_manager/load/load_default.py +++ b/invokeai/backend/model_manager/load/load_default.py @@ -158,20 +158,66 @@ def _apply_fp8_layerwise_casting( if not self._should_use_fp8(config, submodel_type): return model + storage_dtype = torch.float8_e4m3fn + compute_dtype = self._torch_dtype + from diffusers.models.modeling_utils import ModelMixin - if not isinstance(model, ModelMixin): + if isinstance(model, ModelMixin): + model.enable_layerwise_casting( + storage_dtype=storage_dtype, + compute_dtype=compute_dtype, + ) + elif isinstance(model, torch.nn.Module): + # Detect the model's current dtype to use as compute dtype, since custom models + # (e.g. Flux checkpoint) may require a specific dtype (bf16) that differs from + # the global torch dtype (fp16). + first_param = next(model.parameters(), None) + if first_param is not None: + compute_dtype = first_param.dtype + self._apply_fp8_to_nn_module(model, storage_dtype=storage_dtype, compute_dtype=compute_dtype) + else: return model - model.enable_layerwise_casting( - storage_dtype=torch.float8_e4m3fn, - compute_dtype=self._torch_dtype, - ) + param_bytes = sum(p.nelement() * p.element_size() for p in model.parameters()) self._logger.info( - f"FP8 layerwise casting enabled for {config.name} (storage=float8_e4m3fn, compute={self._torch_dtype})" + f"FP8 layerwise casting enabled for {config.name} " + f"(storage=float8_e4m3fn, compute={compute_dtype}, " + f"param_size={param_bytes / (1024**2):.0f}MB)" ) return model + @staticmethod + def _apply_fp8_to_nn_module(model: torch.nn.Module, storage_dtype: torch.dtype, compute_dtype: torch.dtype) -> None: + """Apply FP8 layerwise casting to a plain nn.Module using forward hooks.""" + for module in model.modules(): + params = list(module.parameters(recurse=False)) + if not params: + continue + + # Convert this module's own parameters to FP8 storage dtype + for param in params: + param.data = param.data.to(storage_dtype) + + # Pre-hook: cast to compute dtype before forward + def _make_pre_hook(dt: torch.dtype): + def hook(mod: torch.nn.Module, _args: object) -> None: + for p in mod.parameters(recurse=False): + p.data = p.data.to(dt) + + return hook + + # Post-hook: cast back to storage dtype after forward + def _make_post_hook(dt: torch.dtype): + def hook(mod: torch.nn.Module, _args: object, _output: object) -> None: + for p in mod.parameters(recurse=False): + p.data = p.data.to(dt) + + return hook + + module.register_forward_pre_hook(_make_pre_hook(compute_dtype)) + module.register_forward_hook(_make_post_hook(storage_dtype)) + # This needs to be implemented in the subclass def _load_model( self, diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 08e15279e03..33ab7bc0ce9 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -487,7 +487,9 @@ def _load_model( match submodel_type: case SubModelType.Transformer: - return self._load_from_singlefile(config) + model = self._load_from_singlefile(config) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) + return model raise ValueError( f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" @@ -736,7 +738,9 @@ def _load_model( match submodel_type: case SubModelType.Transformer: - return self._load_from_singlefile(config) + model = self._load_from_singlefile(config) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) + return model raise ValueError( f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx index bfaf6e4983a..812d081c356 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx @@ -1,6 +1,5 @@ -import { Flex, FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; +import { FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; -import { SettingToggle } from 'features/modelManagerV2/subpanels/ModelPanel/SettingToggle'; import type { ChangeEvent } from 'react'; import { memo, useCallback, useMemo } from 'react'; import type { UseControllerProps } from 'react-hook-form'; @@ -21,6 +20,7 @@ export const DefaultFp8StorageControlAdapter = memo( const updatedValue = { ...(field.value as DefaultFp8StorageType), value: e.target.checked, + isEnabled: e.target.checked, }; field.onChange(updatedValue); }, @@ -31,19 +31,12 @@ export const DefaultFp8StorageControlAdapter = memo( return (field.value as DefaultFp8StorageType).value; }, [field.value]); - const isDisabled = useMemo(() => { - return !(field.value as DefaultFp8StorageType).isEnabled; - }, [field.value]); - return ( - - - - {t('modelManager.fp8Storage')} - - - - + + + {t('modelManager.fp8Storage')} + + ); } diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx index 6fa3a86da5c..d860846766e 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx @@ -1,6 +1,5 @@ -import { Flex, FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; +import { FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; -import { SettingToggle } from 'features/modelManagerV2/subpanels/ModelPanel/SettingToggle'; import type { ChangeEvent } from 'react'; import { memo, useCallback, useMemo } from 'react'; import type { UseControllerProps } from 'react-hook-form'; @@ -20,6 +19,7 @@ export const DefaultFp8Storage = memo((props: UseControllerProps { - return !(field.value as DefaultFp8StorageType).isEnabled; - }, [field.value]); - return ( - - - - {t('modelManager.fp8Storage')} - - - - + + + {t('modelManager.fp8Storage')} + + ); }); From a0df643ccac5e8bc18b09ee1293ee1b5578269e2 Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Sat, 21 Mar 2026 04:48:52 +0100 Subject: [PATCH 5/7] fix: exclude Z-Image from FP8 due to diffusers layerwise casting bug Z-Image's transformer has dtype mismatches with diffusers' enable_layerwise_casting: skipped modules (t_embedder, cap_embedder) stay in bf16 while hooked modules cast to fp16, causing crashes in attention layers. Also hide the FP8 toggle in the UI for Z-Image models. --- invokeai/backend/model_manager/load/load_default.py | 7 +++++++ .../backend/model_manager/load/model_loaders/z_image.py | 2 ++ .../MainModelDefaultSettings/MainModelDefaultSettings.tsx | 6 +++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py index 244863b008d..c20ed372695 100644 --- a/invokeai/backend/model_manager/load/load_default.py +++ b/invokeai/backend/model_manager/load/load_default.py @@ -130,6 +130,13 @@ def _should_use_fp8(self, config: AnyModelConfig, submodel_type: Optional[SubMod if self._torch_device.type != "cuda": return False + # Z-Image has dtype mismatch issues with diffusers' layerwise casting + # (skipped modules produce bf16, hooked modules expect fp16). + from invokeai.backend.model_manager.taxonomy import BaseModelType + + if hasattr(config, "base") and config.base == BaseModelType.ZImage: + return False + # Don't apply FP8 to text encoders, tokenizers, schedulers, etc. _excluded_submodel_types = { SubModelType.TextEncoder, diff --git a/invokeai/backend/model_manager/load/model_loaders/z_image.py b/invokeai/backend/model_manager/load/model_loaders/z_image.py index 6c2102933af..626defabf69 100644 --- a/invokeai/backend/model_manager/load/model_loaders/z_image.py +++ b/invokeai/backend/model_manager/load/model_loaders/z_image.py @@ -288,6 +288,8 @@ def _load_from_singlefile( sd[k] = sd[k].to(model_dtype) model.load_state_dict(sd, assign=True) + + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) return model diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx index 91d255f5a49..b69bf117bc6 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx @@ -56,6 +56,10 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => { return ['flux', 'flux2'].includes(modelConfig.base); }, [modelConfig]); + const isZImage = useMemo(() => { + return modelConfig.base === 'z-image'; + }, [modelConfig]); + const defaultSettingsDefaults = useMainModelDefaultSettings(modelConfig); const optimalDimension = useMemo(() => { const modelBase = modelConfig?.base; @@ -144,7 +148,7 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => { {!isFluxFamily && } - + {!isZImage && } ); From 06ad3c7e93bf9c9d76f909c3ef36f14c8f0f03dd Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Sat, 21 Mar 2026 06:32:14 +0100 Subject: [PATCH 6/7] fix: detect model dtype for FP8 compute instead of using global dtype Models like Flux are loaded in bf16 but the global torch dtype is fp16, causing dtype mismatches during FP8 layerwise casting. Detect the model's actual parameter dtype and use it as compute_dtype for both diffusers ModelMixin and plain nn.Module models. --- invokeai/backend/model_manager/load/load_default.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py index c20ed372695..b0a7b6f9034 100644 --- a/invokeai/backend/model_manager/load/load_default.py +++ b/invokeai/backend/model_manager/load/load_default.py @@ -168,6 +168,13 @@ def _apply_fp8_layerwise_casting( storage_dtype = torch.float8_e4m3fn compute_dtype = self._torch_dtype + # Detect the model's current dtype to use as compute dtype, since models + # (e.g. Flux) may require a specific dtype (bf16) that differs from the global torch dtype (fp16). + if isinstance(model, torch.nn.Module): + first_param = next(model.parameters(), None) + if first_param is not None: + compute_dtype = first_param.dtype + from diffusers.models.modeling_utils import ModelMixin if isinstance(model, ModelMixin): @@ -176,12 +183,6 @@ def _apply_fp8_layerwise_casting( compute_dtype=compute_dtype, ) elif isinstance(model, torch.nn.Module): - # Detect the model's current dtype to use as compute dtype, since custom models - # (e.g. Flux checkpoint) may require a specific dtype (bf16) that differs from - # the global torch dtype (fp16). - first_param = next(model.parameters(), None) - if first_param is not None: - compute_dtype = first_param.dtype self._apply_fp8_to_nn_module(model, storage_dtype=storage_dtype, compute_dtype=compute_dtype) else: return model From 025759fad3f073191ffd13a056bb42b08961b7fc Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Sat, 21 Mar 2026 09:12:44 +0100 Subject: [PATCH 7/7] Remove call for _should_use_fp8 in z-image --- invokeai/backend/model_manager/load/model_loaders/z_image.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/z_image.py b/invokeai/backend/model_manager/load/model_loaders/z_image.py index 626defabf69..6c2102933af 100644 --- a/invokeai/backend/model_manager/load/model_loaders/z_image.py +++ b/invokeai/backend/model_manager/load/model_loaders/z_image.py @@ -288,8 +288,6 @@ def _load_from_singlefile( sd[k] = sd[k].to(model_dtype) model.load_state_dict(sd, assign=True) - - model = self._apply_fp8_layerwise_casting(model, config, submodel_type) return model