From 1fd84496940a1157de057578ab0467e2db6c157d Mon Sep 17 00:00:00 2001 From: Suguna Velury <178320438+sugunav14@users.noreply.github.com> Date: Fri, 6 Feb 2026 19:31:48 +0000 Subject: [PATCH 1/2] update to use dtype/ torch_dtype Signed-off-by: Suguna Velury <178320438+sugunav14@users.noreply.github.com> --- examples/gpt-oss/sft.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/gpt-oss/sft.py b/examples/gpt-oss/sft.py index d85f5a072..b51f1f9a6 100644 --- a/examples/gpt-oss/sft.py +++ b/examples/gpt-oss/sft.py @@ -72,7 +72,8 @@ def main(script_args, training_args, model_args, quant_args): "revision": model_args.model_revision, "trust_remote_code": model_args.trust_remote_code, "attn_implementation": model_args.attn_implementation, - "torch_dtype": model_args.torch_dtype, + "torch_dtype": getattr(model_args, "torch_dtype", None) + or getattr(model_args, "dtype", None), "use_cache": not training_args.gradient_checkpointing, } From 98e642d5f308b36ba42edc30298ddf79b8396adf Mon Sep 17 00:00:00 2001 From: Suguna Velury <178320438+sugunav14@users.noreply.github.com> Date: Fri, 6 Feb 2026 20:28:21 +0000 Subject: [PATCH 2/2] update Signed-off-by: Suguna Velury <178320438+sugunav14@users.noreply.github.com> --- examples/gpt-oss/sft.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/gpt-oss/sft.py b/examples/gpt-oss/sft.py index b51f1f9a6..9c2d6aeb8 100644 --- a/examples/gpt-oss/sft.py +++ b/examples/gpt-oss/sft.py @@ -72,8 +72,7 @@ def main(script_args, training_args, model_args, quant_args): "revision": model_args.model_revision, "trust_remote_code": model_args.trust_remote_code, "attn_implementation": model_args.attn_implementation, - "torch_dtype": getattr(model_args, "torch_dtype", None) - or getattr(model_args, "dtype", None), + "torch_dtype": getattr(model_args, "dtype", "float32"), "use_cache": not training_args.gradient_checkpointing, }