Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions comfy/model_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,7 @@ def minimum_inference_memory():

def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
cleanup_models_gc()
comfy.memory_management.extra_ram_release(max(pins_required, ram_required))
unloaded_model = []
can_unload = []
unloaded_models = []
Expand Down
6 changes: 0 additions & 6 deletions comfy/pinned_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import comfy.memory_management
import comfy_aimdo.host_buffer
import comfy_aimdo.torch
import psutil

from comfy.cli_args import args

Expand All @@ -12,11 +11,6 @@ def get_pin(module):
def pin_memory(module):
if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
return
#FIXME: This is a RAM cache trigger event
ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
#we split the difference and assume half the RAM cache headroom is for us
if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
comfy.memory_management.extra_ram_release(ram_headroom)

size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])

Expand Down
8 changes: 7 additions & 1 deletion comfy_execution/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
import torch
from typing import Sequence, Mapping, Dict
from comfy.model_patcher import ModelPatcher
from comfy_execution.graph import DynamicPrompt
from abc import ABC, abstractmethod

Expand Down Expand Up @@ -523,13 +524,15 @@ def set_local(self, node_id, value):
self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
super().set_local(node_id, value)

def ram_release(self, target):
def ram_release(self, target, free_active=False):
if psutil.virtual_memory().available >= target:
return

clean_list = []

for key, cache_entry in self.cache.items():
if not free_active and self.used_generation[key] == self.generation:
continue
oom_score = RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])

ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
Expand All @@ -542,6 +545,9 @@ def scan_list_for_ram_usage(outputs):
scan_list_for_ram_usage(output)
elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
ram_usage += output.numel() * output.element_size()
elif isinstance(output, ModelPatcher) and self.used_generation[key] != self.generation:
#old ModelPatchers are the first to go
ram_usage = 1e30
scan_list_for_ram_usage(cache_entry.outputs)

oom_score *= ram_usage
Expand Down
2 changes: 1 addition & 1 deletion execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ async def execute_async(self, prompt, prompt_id, extra_data={}, execute_outputs=

if self.cache_type == CacheType.RAM_PRESSURE:
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
comfy.memory_management.extra_ram_release(ram_headroom)
ram_release_callback(ram_headroom, free_active=True)
else:
# Only execute when the while-loop ends without break
# Send cached UI for intermediate output nodes that weren't executed
Expand Down
Loading