Skip to content

Commit 7022b29

Browse files
committed
Added back shape_changed
1 parent d6ed44e commit 7022b29

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

core/runtime/execute_engine.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ void setup_input_tensors(
9696
std::vector<at::Tensor> inputs,
9797
c10::intrusive_ptr<TRTEngine> compiled_engine,
9898
bool cudagraphs_enabled,
99-
bool need_cudagraphs_record) {
99+
bool need_cudagraphs_record,
100+
bool shape_changed) {
100101
// this is a buffer to store shape tensor input addresses throughout the runtime scope
101102
std::list<std::vector<int64_t>> inputShapeTensorValues;
102103
std::list<at::Tensor> formatted_inputs(compiled_engine->num_io.first);
@@ -145,7 +146,7 @@ void setup_input_tensors(
145146
// Create a new persistent input buffer
146147
compiled_engine->input_buffers[i] = std::move(formatted_inputs.back().clone());
147148
}
148-
if (need_cudagraphs_record) {
149+
if (shape_changed) {
149150
TORCHTRT_CHECK(
150151
compiled_engine->exec_ctx->setInputShape(name.c_str(), dims), "Error while setting the input shape");
151152
}
@@ -226,7 +227,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
226227
input_profiler_guard =
227228
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->input_profile_path);
228229
}
229-
setup_input_tensors(inputs, compiled_engine, cudagraphs_enabled, need_cudagraphs_record);
230+
setup_input_tensors(inputs, compiled_engine, cudagraphs_enabled, need_cudagraphs_record, shape_changed);
230231
// Check if input shapes can be inferred.
231232
int32_t const io_size{compiled_engine->io_size};
232233
std::vector<char const*> names(io_size);
@@ -361,7 +362,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
361362
std::make_unique<torch::autograd::profiler::RecordProfile>(compiled_engine->input_profile_path);
362363
}
363364

364-
setup_input_tensors(inputs, compiled_engine, false, false);
365+
setup_input_tensors(inputs, compiled_engine, false, false, true);
365366
// Check if input shapes can be inferred.
366367
int32_t const io_size{compiled_engine->cuda_engine->getNbIOTensors()};
367368
std::vector<char const*> names(io_size);

py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ def setup_input_tensors(
384384
contiguous_inputs: List[torch.Tensor],
385385
cudagraphs_enabled: bool,
386386
need_cudagraphs_record: bool,
387+
shape_changed: bool = True,
387388
) -> None:
388389
for i, input_name in enumerate(self.input_names):
389390
if not contiguous_inputs[i].is_cuda:
@@ -417,7 +418,7 @@ def setup_input_tensors(
417418
inputs_cpu = contiguous_inputs[i].cpu().to(torch.int64).numpy().copy()
418419
self.context.set_tensor_address(input_name, inputs_cpu.ctypes.data)
419420
else:
420-
if need_cudagraphs_record:
421+
if shape_changed:
421422
self.context.set_input_shape(
422423
input_name, tuple(contiguous_inputs[i].shape)
423424
)
@@ -491,6 +492,8 @@ def run_standard_execution() -> torch.Tensor | Tuple[torch.Tensor, ...]:
491492
contiguous_inputs,
492493
self.cudagraphs_enabled,
493494
need_cudagraphs_record,
495+
shape_changed
496+
or self.output_tensors is None, # First time execution
494497
)
495498

496499
if shape_changed:

0 commit comments

Comments
 (0)