diff --git a/invokeai/backend/model_manager/configs/controlnet.py b/invokeai/backend/model_manager/configs/controlnet.py index 6688ec95ffd..1c73df41209 100644 --- a/invokeai/backend/model_manager/configs/controlnet.py +++ b/invokeai/backend/model_manager/configs/controlnet.py @@ -54,6 +54,10 @@ class ControlAdapterDefaultSettings(BaseModel): # This could be narrowed to controlnet processor nodes, but they change. Leaving this a string is safer. preprocessor: str | None + fp8_storage: bool | None = Field( + default=None, + description="Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference.", + ) model_config = ConfigDict(extra="forbid") @classmethod diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py index 6f737ceb92d..b8f107de127 100644 --- a/invokeai/backend/model_manager/configs/main.py +++ b/invokeai/backend/model_manager/configs/main.py @@ -51,6 +51,10 @@ class MainModelDefaultSettings(BaseModel): height: int | None = Field(default=None, multiple_of=8, ge=64, description="Default height for this model") guidance: float | None = Field(default=None, ge=1, description="Default Guidance for this model") cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") + fp8_storage: bool | None = Field( + default=None, + description="Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference.", + ) model_config = ConfigDict(extra="forbid") diff --git a/invokeai/backend/model_manager/load/load_default.py b/invokeai/backend/model_manager/load/load_default.py index ea699207348..b0a7b6f9034 100644 --- a/invokeai/backend/model_manager/load/load_default.py +++ b/invokeai/backend/model_manager/load/load_default.py @@ -124,6 +124,108 @@ def get_size_fs( variant=config.repo_variant if isinstance(config, Diffusers_Config_Base) else None, ) + def _should_use_fp8(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> bool: + """Check if FP8 layerwise casting should be applied to a model.""" + # FP8 storage only works on CUDA + if self._torch_device.type != "cuda": + return False + + # Z-Image has dtype mismatch issues with diffusers' layerwise casting + # (skipped modules produce bf16, hooked modules expect fp16). + from invokeai.backend.model_manager.taxonomy import BaseModelType + + if hasattr(config, "base") and config.base == BaseModelType.ZImage: + return False + + # Don't apply FP8 to text encoders, tokenizers, schedulers, etc. + _excluded_submodel_types = { + SubModelType.TextEncoder, + SubModelType.TextEncoder2, + SubModelType.TextEncoder3, + SubModelType.Tokenizer, + SubModelType.Tokenizer2, + SubModelType.Tokenizer3, + SubModelType.Scheduler, + SubModelType.SafetyChecker, + } + if submodel_type in _excluded_submodel_types: + return False + + # Check default_settings.fp8_storage (Main models, ControlNet) + if hasattr(config, "default_settings") and config.default_settings is not None: + if hasattr(config.default_settings, "fp8_storage") and config.default_settings.fp8_storage is True: + return True + + return False + + def _apply_fp8_layerwise_casting( + self, model: AnyModel, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None + ) -> AnyModel: + """Apply FP8 layerwise casting to a model if enabled in its config.""" + if not self._should_use_fp8(config, submodel_type): + return model + + storage_dtype = torch.float8_e4m3fn + compute_dtype = self._torch_dtype + + # Detect the model's current dtype to use as compute dtype, since models + # (e.g. Flux) may require a specific dtype (bf16) that differs from the global torch dtype (fp16). + if isinstance(model, torch.nn.Module): + first_param = next(model.parameters(), None) + if first_param is not None: + compute_dtype = first_param.dtype + + from diffusers.models.modeling_utils import ModelMixin + + if isinstance(model, ModelMixin): + model.enable_layerwise_casting( + storage_dtype=storage_dtype, + compute_dtype=compute_dtype, + ) + elif isinstance(model, torch.nn.Module): + self._apply_fp8_to_nn_module(model, storage_dtype=storage_dtype, compute_dtype=compute_dtype) + else: + return model + + param_bytes = sum(p.nelement() * p.element_size() for p in model.parameters()) + self._logger.info( + f"FP8 layerwise casting enabled for {config.name} " + f"(storage=float8_e4m3fn, compute={compute_dtype}, " + f"param_size={param_bytes / (1024**2):.0f}MB)" + ) + return model + + @staticmethod + def _apply_fp8_to_nn_module(model: torch.nn.Module, storage_dtype: torch.dtype, compute_dtype: torch.dtype) -> None: + """Apply FP8 layerwise casting to a plain nn.Module using forward hooks.""" + for module in model.modules(): + params = list(module.parameters(recurse=False)) + if not params: + continue + + # Convert this module's own parameters to FP8 storage dtype + for param in params: + param.data = param.data.to(storage_dtype) + + # Pre-hook: cast to compute dtype before forward + def _make_pre_hook(dt: torch.dtype): + def hook(mod: torch.nn.Module, _args: object) -> None: + for p in mod.parameters(recurse=False): + p.data = p.data.to(dt) + + return hook + + # Post-hook: cast back to storage dtype after forward + def _make_post_hook(dt: torch.dtype): + def hook(mod: torch.nn.Module, _args: object, _output: object) -> None: + for p in mod.parameters(recurse=False): + p.data = p.data.to(dt) + + return hook + + module.register_forward_pre_hook(_make_pre_hook(compute_dtype)) + module.register_forward_hook(_make_post_hook(storage_dtype)) + # This needs to be implemented in the subclass def _load_model( self, diff --git a/invokeai/backend/model_manager/load/model_loaders/cogview4.py b/invokeai/backend/model_manager/load/model_loaders/cogview4.py index ee8c6d4f41d..6e8490912bc 100644 --- a/invokeai/backend/model_manager/load/model_loaders/cogview4.py +++ b/invokeai/backend/model_manager/load/model_loaders/cogview4.py @@ -55,4 +55,5 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result diff --git a/invokeai/backend/model_manager/load/model_loaders/controlnet.py b/invokeai/backend/model_manager/load/model_loaders/controlnet.py index 8fd1796b8f5..e50e45849ab 100644 --- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py +++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py @@ -45,9 +45,11 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if isinstance(config, ControlNet_Checkpoint_Config_Base): - return ControlNetModel.from_single_file( + result = ControlNetModel.from_single_file( config.path, torch_dtype=self._torch_dtype, ) + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) + return result else: return super()._load_model(config, submodel_type) diff --git a/invokeai/backend/model_manager/load/model_loaders/flux.py b/invokeai/backend/model_manager/load/model_loaders/flux.py index 2de51a8acae..33ab7bc0ce9 100644 --- a/invokeai/backend/model_manager/load/model_loaders/flux.py +++ b/invokeai/backend/model_manager/load/model_loaders/flux.py @@ -139,6 +139,7 @@ def _load_model( local_files_only=True, ) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) return model @@ -201,6 +202,7 @@ def _load_model( vae_dtype = self._torch_dtype model.to(vae_dtype) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) return model def _convert_flux2_vae_bfl_to_diffusers(self, sd: dict) -> dict: @@ -485,7 +487,9 @@ def _load_model( match submodel_type: case SubModelType.Transformer: - return self._load_from_singlefile(config) + model = self._load_from_singlefile(config) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) + return model raise ValueError( f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" @@ -639,6 +643,7 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result @@ -715,6 +720,7 @@ def _load_model( if guidance_emb.linear_2.bias is not None: guidance_emb.linear_2.bias.data.zero_() + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result @@ -732,7 +738,9 @@ def _load_model( match submodel_type: case SubModelType.Transformer: - return self._load_from_singlefile(config) + model = self._load_from_singlefile(config) + model = self._apply_fp8_layerwise_casting(model, config, submodel_type) + return model raise ValueError( f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}" diff --git a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py index 2a79f604ba2..7e87869c9e3 100644 --- a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +++ b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py @@ -47,6 +47,7 @@ def _load_model( result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True) else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result # TO DO: Add exception handling diff --git a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py index 0e11cd4191d..d19d6477626 100644 --- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py @@ -90,6 +90,7 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result def _load_from_singlefile( @@ -152,5 +153,8 @@ def _load_from_singlefile( if subtype == submodel_type: continue if submodel := getattr(pipeline, subtype.value, None): + self._apply_fp8_layerwise_casting(submodel, config, subtype) self._ram_cache.put(get_model_cache_key(config.key, subtype), model=submodel) - return getattr(pipeline, submodel_type.value) + result = getattr(pipeline, submodel_type.value) + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) + return result diff --git a/invokeai/backend/model_manager/load/model_loaders/vae.py b/invokeai/backend/model_manager/load/model_loaders/vae.py index e91903ccdad..077e688a813 100644 --- a/invokeai/backend/model_manager/load/model_loaders/vae.py +++ b/invokeai/backend/model_manager/load/model_loaders/vae.py @@ -29,9 +29,11 @@ def _load_model( submodel_type: Optional[SubModelType] = None, ) -> AnyModel: if isinstance(config, VAE_Checkpoint_Config_Base): - return AutoencoderKL.from_single_file( + result = AutoencoderKL.from_single_file( config.path, torch_dtype=self._torch_dtype, ) + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) + return result else: return super()._load_model(config, submodel_type) diff --git a/invokeai/backend/model_manager/load/model_loaders/z_image.py b/invokeai/backend/model_manager/load/model_loaders/z_image.py index c381e02718d..6c2102933af 100644 --- a/invokeai/backend/model_manager/load/model_loaders/z_image.py +++ b/invokeai/backend/model_manager/load/model_loaders/z_image.py @@ -163,6 +163,7 @@ def _load_model( else: raise e + result = self._apply_fp8_layerwise_casting(result, config, submodel_type) return result diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 58be5430a26..a1f0ccbf257 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -1031,6 +1031,7 @@ "convertToDiffusersHelpText5": "Please make sure you have enough disk space. Models generally vary between 2GB-7GB in size.", "convertToDiffusersHelpText6": "Do you wish to convert this model?", "cpuOnly": "CPU Only", + "fp8Storage": "FP8 Storage (Save VRAM)", "runOnCpu": "Run text encoder model on CPU only", "noDefaultSettings": "No default settings configured for this model. Visit the Model Manager to add default settings.", "defaultSettings": "Default Settings", @@ -2198,6 +2199,13 @@ "When enabled, only the text encoder component will run on CPU instead of GPU.", "This saves VRAM for the denoiser while only slightly impacting performance. The conditioning outputs are automatically moved to GPU for the denoiser." ] + }, + "fp8Storage": { + "heading": "FP8 Storage", + "paragraphs": [ + "Stores model weights in FP8 format in VRAM, reducing memory usage by approximately 50% compared to FP16.", + "During inference, weights are cast layer-by-layer to the compute precision (FP16/BF16), so image quality is preserved. Works on all CUDA GPUs." + ] } }, "workflows": { diff --git a/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts b/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts index 95fa75cfa32..e9d855648ad 100644 --- a/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts +++ b/invokeai/frontend/web/src/common/components/InformationalPopover/constants.ts @@ -77,7 +77,8 @@ export type Feature = | 'tileOverlap' | 'optimizedDenoising' | 'fluxDevLicense' - | 'cpuOnly'; + | 'cpuOnly' + | 'fp8Storage'; export type PopoverData = PopoverProps & { image?: string; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts index 1f14c08dedc..9ab49ca889f 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings.ts @@ -11,6 +11,10 @@ export const useControlAdapterModelDefaultSettings = ( isEnabled: !isNil(modelConfig?.default_settings?.preprocessor), value: modelConfig?.default_settings?.preprocessor || 'none', }, + fp8Storage: { + isEnabled: !isNil(modelConfig?.default_settings?.fp8_storage), + value: modelConfig?.default_settings?.fp8_storage ?? false, + }, }; }, [modelConfig?.default_settings]); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts index dfab2d251f9..10b66b7a25e 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useMainModelDefaultSettings.ts @@ -41,6 +41,10 @@ export const useMainModelDefaultSettings = (modelConfig: MainModelConfig) => { isEnabled: !isNil(modelConfig?.default_settings?.guidance), value: modelConfig?.default_settings?.guidance ?? 4, }, + fp8Storage: { + isEnabled: !isNil(modelConfig?.default_settings?.fp8_storage), + value: modelConfig?.default_settings?.fp8_storage ?? false, + }, }; }, [modelConfig]); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx index a5e8f10a4bc..aad70846ffd 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/ControlAdapterModelDefaultSettings.tsx @@ -1,6 +1,7 @@ import { Button, Flex, Heading, SimpleGrid } from '@invoke-ai/ui-library'; import { useControlAdapterModelDefaultSettings } from 'features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings'; import { useIsModelManagerEnabled } from 'features/modelManagerV2/hooks/useIsModelManagerEnabled'; +import { DefaultFp8StorageControlAdapter } from 'features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter'; import { DefaultPreprocessor } from 'features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultPreprocessor'; import type { FormField } from 'features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings'; import { toast } from 'features/toast/toast'; @@ -14,6 +15,7 @@ import type { ControlLoRAModelConfig, ControlNetModelConfig, T2IAdapterModelConf export type ControlAdapterModelDefaultSettingsFormData = { preprocessor: FormField; + fp8Storage: FormField; }; type Props = { @@ -40,6 +42,7 @@ export const ControlAdapterModelDefaultSettings = memo(({ modelConfig }: Props) (data) => { const body = { preprocessor: data.preprocessor.isEnabled ? data.preprocessor.value : null, + fp8_storage: data.fp8Storage.isEnabled ? data.fp8Storage.value : null, }; updateModel({ @@ -88,6 +91,7 @@ export const ControlAdapterModelDefaultSettings = memo(({ modelConfig }: Props) + ); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx new file mode 100644 index 00000000000..812d081c356 --- /dev/null +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter.tsx @@ -0,0 +1,45 @@ +import { FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; +import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; +import type { ChangeEvent } from 'react'; +import { memo, useCallback, useMemo } from 'react'; +import type { UseControllerProps } from 'react-hook-form'; +import { useController } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; + +import type { ControlAdapterModelDefaultSettingsFormData } from './ControlAdapterModelDefaultSettings'; + +type DefaultFp8StorageType = ControlAdapterModelDefaultSettingsFormData['fp8Storage']; + +export const DefaultFp8StorageControlAdapter = memo( + (props: UseControllerProps) => { + const { t } = useTranslation(); + const { field } = useController(props); + + const onChange = useCallback( + (e: ChangeEvent) => { + const updatedValue = { + ...(field.value as DefaultFp8StorageType), + value: e.target.checked, + isEnabled: e.target.checked, + }; + field.onChange(updatedValue); + }, + [field] + ); + + const value = useMemo(() => { + return (field.value as DefaultFp8StorageType).value; + }, [field.value]); + + return ( + + + {t('modelManager.fp8Storage')} + + + + ); + } +); + +DefaultFp8StorageControlAdapter.displayName = 'DefaultFp8StorageControlAdapter'; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx new file mode 100644 index 00000000000..d860846766e --- /dev/null +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/DefaultFp8Storage.tsx @@ -0,0 +1,43 @@ +import { FormControl, FormLabel, Switch } from '@invoke-ai/ui-library'; +import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; +import type { ChangeEvent } from 'react'; +import { memo, useCallback, useMemo } from 'react'; +import type { UseControllerProps } from 'react-hook-form'; +import { useController } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; + +import type { MainModelDefaultSettingsFormData } from './MainModelDefaultSettings'; + +type DefaultFp8StorageType = MainModelDefaultSettingsFormData['fp8Storage']; + +export const DefaultFp8Storage = memo((props: UseControllerProps) => { + const { t } = useTranslation(); + const { field } = useController(props); + + const onChange = useCallback( + (e: ChangeEvent) => { + const updatedValue = { + ...(field.value as DefaultFp8StorageType), + value: e.target.checked, + isEnabled: e.target.checked, + }; + field.onChange(updatedValue); + }, + [field] + ); + + const value = useMemo(() => { + return (field.value as DefaultFp8StorageType).value; + }, [field.value]); + + return ( + + + {t('modelManager.fp8Storage')} + + + + ); +}); + +DefaultFp8Storage.displayName = 'DefaultFp8Storage'; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx index dd944897b24..b69bf117bc6 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings.tsx @@ -18,6 +18,7 @@ import type { MainModelConfig } from 'services/api/types'; import { DefaultCfgRescaleMultiplier } from './DefaultCfgRescaleMultiplier'; import { DefaultCfgScale } from './DefaultCfgScale'; +import { DefaultFp8Storage } from './DefaultFp8Storage'; import { DefaultGuidance } from './DefaultGuidance'; import { DefaultScheduler } from './DefaultScheduler'; import { DefaultSteps } from './DefaultSteps'; @@ -39,6 +40,7 @@ export type MainModelDefaultSettingsFormData = { width: FormField; height: FormField; guidance: FormField; + fp8Storage: FormField; }; type Props = { @@ -54,6 +56,10 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => { return ['flux', 'flux2'].includes(modelConfig.base); }, [modelConfig]); + const isZImage = useMemo(() => { + return modelConfig.base === 'z-image'; + }, [modelConfig]); + const defaultSettingsDefaults = useMainModelDefaultSettings(modelConfig); const optimalDimension = useMemo(() => { const modelBase = modelConfig?.base; @@ -85,6 +91,7 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => { width: data.width.isEnabled ? data.width.value : null, height: data.height.isEnabled ? data.height.value : null, guidance: data.guidance.isEnabled ? data.guidance.value : null, + fp8_storage: data.fp8Storage.isEnabled ? data.fp8Storage.value : null, }; updateModel({ @@ -141,6 +148,7 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => { {!isFluxFamily && } + {!isZImage && } ); diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index fc6506ce22b..ef4118492b5 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -5437,6 +5437,11 @@ export type components = { ControlAdapterDefaultSettings: { /** Preprocessor */ preprocessor: string | null; + /** + * Fp8 Storage + * @description Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference. + */ + fp8_storage?: boolean | null; }; /** ControlField */ ControlField: { @@ -17398,6 +17403,11 @@ export type components = { * @description Whether this model should run on CPU only */ cpu_only?: boolean | null; + /** + * Fp8 Storage + * @description Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference. + */ + fp8_storage?: boolean | null; }; /** * Main Model - SD1.5, SD2