From 530e5a489cfa298209efa80e239b0f7595e8c8ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Sat, 28 Feb 2026 10:17:29 -0800
Subject: [PATCH 1/6] fix: resolve .env from CWD instead of package source
 location

dotenv_values() and load_dotenv() without arguments use find_dotenv()
which walks up from the calling file's directory. For editable installs,
this resolves the library's dev .env (PYTHONPATH=src, FLASH_HOST=localhost)
instead of the user's project .env (HF_TOKEN, RUNPOD_API_KEY, etc).

Pass find_dotenv(usecwd=True) so the search starts from CWD (the user's
project directory) in both __init__.py and environment.py.
---
 src/runpod_flash/__init__.py                   |  6 ++++--
 src/runpod_flash/core/resources/environment.py |  6 ++++--
 tests/unit/test_dotenv_loading.py              | 10 +++++-----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/runpod_flash/__init__.py b/src/runpod_flash/__init__.py
index 723d385c..4a6e5b3c 100644
--- a/src/runpod_flash/__init__.py
+++ b/src/runpod_flash/__init__.py
@@ -1,9 +1,11 @@
 __version__ = "1.9.0"  # x-release-please-version
 
 # Load .env vars from file before everything else
-from dotenv import load_dotenv
+# usecwd=True walks up from CWD (user's project) instead of from the
+# package source file location, which matters for editable installs.
+from dotenv import find_dotenv, load_dotenv
 
-load_dotenv()
+load_dotenv(find_dotenv(usecwd=True))
 
 from .logger import setup_logging  # noqa: E402
 
diff --git a/src/runpod_flash/core/resources/environment.py b/src/runpod_flash/core/resources/environment.py
index e0088972..4897e75d 100644
--- a/src/runpod_flash/core/resources/environment.py
+++ b/src/runpod_flash/core/resources/environment.py
@@ -1,5 +1,5 @@
 from typing import Dict, Optional
-from dotenv import dotenv_values
+from dotenv import dotenv_values, find_dotenv
 
 
 class EnvironmentVars:
@@ -16,7 +16,9 @@ def _load_env(self) -> Dict[str, str]:
             Dict[str, str]: Dictionary containing environment variables from .env file
         """
         # Use dotenv_values instead of load_dotenv to get only variables from .env
-        return dict(dotenv_values())
+        # usecwd=True walks up from CWD (user's project) instead of from the
+        # package source file location, which matters for editable installs.
+        return dict(dotenv_values(find_dotenv(usecwd=True)))
 
     def get_env(self) -> Dict[str, str]:
         """
diff --git a/tests/unit/test_dotenv_loading.py b/tests/unit/test_dotenv_loading.py
index 90e2c903..1a080b1f 100644
--- a/tests/unit/test_dotenv_loading.py
+++ b/tests/unit/test_dotenv_loading.py
@@ -32,9 +32,9 @@ def test_dotenv_loads_before_imports(self):
         logger_import_line = None
 
         for i, line in enumerate(lines):
-            if "from dotenv import load_dotenv" in line:
+            if "from dotenv import" in line and "load_dotenv" in line:
                 dotenv_import_line = i
-            elif line.strip() == "load_dotenv()":
+            elif "load_dotenv(" in line.strip() and "import" not in line:
                 dotenv_call_line = i
             elif "from .logger import setup_logging" in line:
                 logger_import_line = i
@@ -331,8 +331,8 @@ def test_dotenv_import_present_in_init(self):
         content = init_file.read_text()
 
         # Verify dotenv is imported and called
-        assert "from dotenv import load_dotenv" in content
-        assert "load_dotenv()" in content
+        assert "from dotenv import find_dotenv, load_dotenv" in content
+        assert "load_dotenv(find_dotenv(usecwd=True))" in content
 
         # Verify dotenv is imported before any other module imports
         lines = content.split("\n")
@@ -341,7 +341,7 @@ def test_dotenv_import_present_in_init(self):
         ]
 
         # First import line should be the dotenv import
-        assert "from dotenv import load_dotenv" in import_lines[0]
+        assert "from dotenv import find_dotenv, load_dotenv" in import_lines[0]
 
     @patch.dict(os.environ, {}, clear=True)
     def test_clean_environment_dotenv_loading(self):

From b977bac885a692681a95915b28e8d6b297c75640 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Sat, 28 Feb 2026 10:18:25 -0800
Subject: [PATCH 2/6] fix: stop _do_deploy from mutating self.env to prevent
 false config drift

_do_deploy injected runtime env vars (RUNPOD_API_KEY, FLASH_MODULE_PATH,
FLASH_ENDPOINT_TYPE) directly into self.env, which is a hashed field.
This caused config drift detection on every subsequent deploy, triggering
unnecessary rolling releases.

Add _inject_template_env() helper that appends KeyValuePairs to
self.template.env instead. Runtime injections now go into the template
(which is excluded from hashing) while self.env stays clean for drift
detection.
---
 .../resources/load_balancer_sls_resource.py   |   7 +-
 src/runpod_flash/core/resources/serverless.py |  40 ++--
 tests/unit/resources/test_serverless.py       | 214 +++++++++++++++++-
 3 files changed, 238 insertions(+), 23 deletions(-)

diff --git a/src/runpod_flash/core/resources/load_balancer_sls_resource.py b/src/runpod_flash/core/resources/load_balancer_sls_resource.py
index ee0cf4ef..6ee725bd 100644
--- a/src/runpod_flash/core/resources/load_balancer_sls_resource.py
+++ b/src/runpod_flash/core/resources/load_balancer_sls_resource.py
@@ -255,10 +255,9 @@ async def _do_deploy(self) -> "LoadBalancerSlsResource":
             return self
 
         try:
-            # Mark this endpoint as load-balanced (triggers auto-provisioning on boot)
-            if self.env is None:
-                self.env = {}
-            self.env["FLASH_ENDPOINT_TYPE"] = "lb"
+            # Mark this endpoint as load-balanced (triggers auto-provisioning on boot).
+            # Injected into template.env (not self.env) to avoid false config drift.
+            self._inject_template_env("FLASH_ENDPOINT_TYPE", "lb")
 
             # Call parent deploy (creates endpoint via RunPod API)
             log.debug(f"Deploying LB endpoint: {self.name}")
diff --git a/src/runpod_flash/core/resources/serverless.py b/src/runpod_flash/core/resources/serverless.py
index 3300a741..487fef36 100644
--- a/src/runpod_flash/core/resources/serverless.py
+++ b/src/runpod_flash/core/resources/serverless.py
@@ -259,13 +259,12 @@ def validate_python_version(cls, v: Optional[str]) -> Optional[str]:
 
     @property
     def config_hash(self) -> str:
-        """Get config hash excluding env and runtime-assigned fields.
+        """Get config hash excluding runtime-assigned fields.
 
         Prevents false drift from:
-        - Dynamic env vars computed at runtime
         - Runtime-assigned fields (template, templateId, aiKey, userId, etc.)
 
-        Only hashes user-specified configuration, not server-assigned state.
+        Hashes user-specified configuration including env vars.
         """
         import hashlib
         import json
@@ -643,6 +642,21 @@ def _get_module_path(self) -> Optional[str]:
         except Exception:
             return None
 
+    def _inject_template_env(self, key: str, value: str) -> None:
+        """Append a KeyValuePair to self.template.env if the key isn't already present.
+
+        This injects runtime env vars directly into the template without
+        mutating self.env, which would cause false config drift on subsequent
+        deploys.
+        """
+        if self.template is None:
+            return
+        if self.template.env is None:
+            self.template.env = []
+        existing_keys = {kv.key for kv in self.template.env}
+        if key not in existing_keys:
+            self.template.env.append(KeyValuePair(key=key, value=value))
+
     async def _do_deploy(self) -> "DeployableResource":
         """
         Deploys the serverless resource using the provided configuration.
@@ -658,21 +672,19 @@ async def _do_deploy(self) -> "DeployableResource":
                 log.debug(f"{self} exists")
                 return self
 
-            # Inject API key for queue-based endpoints that make remote calls
+            # Inject API key for queue-based endpoints that make remote calls.
+            # Injected into template.env (not self.env) to avoid false config drift.
             if self.type == ServerlessType.QB:
-                env_dict = self.env or {}
-
-                # Check if this resource makes remote calls (from build manifest)
                 makes_remote_calls = self._check_makes_remote_calls()
 
                 if makes_remote_calls:
-                    # Inject RUNPOD_API_KEY if not already set
+                    env_dict = self.env or {}
                     if "RUNPOD_API_KEY" not in env_dict:
                         from runpod_flash.core.credentials import get_api_key
 
                         api_key = get_api_key()
                         if api_key:
-                            env_dict["RUNPOD_API_KEY"] = api_key
+                            self._inject_template_env("RUNPOD_API_KEY", api_key)
                             log.debug(
                                 f"{self.name}: Injected RUNPOD_API_KEY for remote calls "
                                 f"(makes_remote_calls=True)"
@@ -683,19 +695,15 @@ async def _do_deploy(self) -> "DeployableResource":
                                 f"Remote calls to other endpoints will fail."
                             )
 
-                self.env = env_dict
-
-            # Inject module path for load-balanced endpoints
+            # Inject module path for load-balanced endpoints.
+            # Injected into template.env (not self.env) to avoid false config drift.
             elif self.type == ServerlessType.LB:
                 env_dict = self.env or {}
-
                 module_path = self._get_module_path()
                 if module_path and "FLASH_MODULE_PATH" not in env_dict:
-                    env_dict["FLASH_MODULE_PATH"] = module_path
+                    self._inject_template_env("FLASH_MODULE_PATH", module_path)
                     log.debug(f"{self.name}: Injected FLASH_MODULE_PATH={module_path}")
 
-                self.env = env_dict
-
             # Ensure network volume is deployed first
             await self._ensure_network_volume_deployed()
 
diff --git a/tests/unit/resources/test_serverless.py b/tests/unit/resources/test_serverless.py
index 22bf4b34..0fe76ad3 100644
--- a/tests/unit/resources/test_serverless.py
+++ b/tests/unit/resources/test_serverless.py
@@ -22,7 +22,7 @@
 from runpod_flash.core.resources.gpu import GpuGroup
 from runpod_flash.core.resources.cpu import CpuInstanceType
 from runpod_flash.core.resources.network_volume import NetworkVolume, DataCenter
-from runpod_flash.core.resources.template import PodTemplate
+from runpod_flash.core.resources.template import KeyValuePair, PodTemplate
 
 
 class TestServerlessResource:
@@ -924,8 +924,6 @@ def test_serverless_endpoint_with_existing_template(self):
 
     def test_serverless_endpoint_template_env_override(self):
         """Test ServerlessEndpoint overrides template env vars."""
-        from runpod_flash.core.resources.template import PodTemplate, KeyValuePair
-
         template = PodTemplate(
             name="existing-template",
             imageName="test/image:v1",
@@ -1421,3 +1419,213 @@ def test_python_version_in_hashed_fields(self):
     def test_python_version_in_input_only(self):
         input_only = self._get_class_set("_input_only")
         assert "python_version" in input_only
+
+
+class TestInjectTemplateEnv:
+    """Test _inject_template_env helper and _do_deploy env non-mutation."""
+
+    def _make_resource_with_template(self, **overrides):
+        """Create a ServerlessEndpoint with a template for injection tests."""
+        defaults = {
+            "name": "inject-test",
+            "imageName": "test:latest",
+            "env": {"USER_VAR": "user_value"},
+            "flashboot": False,
+        }
+        defaults.update(overrides)
+        return ServerlessEndpoint(**defaults)
+
+    def test_inject_template_env_adds_key_value_pair(self):
+        """_inject_template_env adds a KeyValuePair to template.env."""
+        resource = self._make_resource_with_template()
+        assert resource.template is not None
+
+        original_len = len(resource.template.env)
+        resource._inject_template_env("NEW_KEY", "new_value")
+
+        assert len(resource.template.env) == original_len + 1
+        added = resource.template.env[-1]
+        assert added.key == "NEW_KEY"
+        assert added.value == "new_value"
+
+    def test_inject_template_env_is_idempotent(self):
+        """_inject_template_env does not add duplicate keys."""
+        resource = self._make_resource_with_template()
+        assert resource.template is not None
+
+        resource._inject_template_env("DEDUP_KEY", "first")
+        resource._inject_template_env("DEDUP_KEY", "second")
+
+        matching = [kv for kv in resource.template.env if kv.key == "DEDUP_KEY"]
+        assert len(matching) == 1
+        assert matching[0].value == "first"
+
+    def test_inject_template_env_skips_when_no_template(self):
+        """_inject_template_env is a no-op when template is None."""
+        resource = ServerlessResource(name="no-template")
+        resource.template = None
+
+        # Should not raise
+        resource._inject_template_env("KEY", "value")
+
+    def test_inject_template_env_initializes_empty_env_list(self):
+        """_inject_template_env handles template with None env list."""
+        resource = self._make_resource_with_template()
+        resource.template.env = None
+
+        resource._inject_template_env("INIT_KEY", "init_value")
+
+        assert len(resource.template.env) == 1
+        assert resource.template.env[0].key == "INIT_KEY"
+
+    @pytest.mark.asyncio
+    async def test_do_deploy_does_not_mutate_self_env(self):
+        """_do_deploy should not modify self.env (prevents false config drift)."""
+        resource = self._make_resource_with_template(
+            env={"LOG_LEVEL": "INFO"},
+        )
+        env_before = dict(resource.env)
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "endpoint-env-test",
+                "name": "inject-test",
+                "templateId": "tpl-env-test",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                with patch.object(
+                    ServerlessResource, "is_deployed", return_value=False
+                ):
+                    with patch.object(
+                        ServerlessResource,
+                        "_check_makes_remote_calls",
+                        return_value=True,
+                    ):
+                        with patch.dict(os.environ, {"RUNPOD_API_KEY": "test-key-123"}):
+                            await resource._do_deploy()
+
+        assert resource.env == env_before
+
+    @pytest.mark.asyncio
+    async def test_do_deploy_injects_api_key_into_template_env(self):
+        """_do_deploy should inject RUNPOD_API_KEY into template.env for QB endpoints."""
+        resource = self._make_resource_with_template(
+            env={"LOG_LEVEL": "INFO"},
+        )
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "endpoint-inject-test",
+                "name": "inject-test",
+                "templateId": "tpl-inject-test",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                with patch.object(
+                    ServerlessResource, "is_deployed", return_value=False
+                ):
+                    with patch.object(
+                        ServerlessResource,
+                        "_check_makes_remote_calls",
+                        return_value=True,
+                    ):
+                        with patch.dict(os.environ, {"RUNPOD_API_KEY": "test-key-456"}):
+                            await resource._do_deploy()
+
+        # The API key should have been in the payload sent to save_endpoint
+        # via the template env, not via self.env
+        payload = mock_client.save_endpoint.call_args.args[0]
+        template_env = payload.get("template", {}).get("env", [])
+        api_key_entries = [e for e in template_env if e["key"] == "RUNPOD_API_KEY"]
+        assert len(api_key_entries) == 1
+        assert api_key_entries[0]["value"] == "test-key-456"
+
+    @pytest.mark.asyncio
+    async def test_do_deploy_lb_injects_module_path_into_template_env(self):
+        """_do_deploy should inject FLASH_MODULE_PATH into template.env for LB endpoints."""
+        from runpod_flash.core.resources.load_balancer_sls_resource import (
+            LoadBalancerSlsResource,
+        )
+
+        resource = LoadBalancerSlsResource(
+            name="lb-inject-test",
+            imageName="test:latest",
+            env={"LOG_LEVEL": "INFO"},
+            flashboot=False,
+        )
+        env_before = dict(resource.env)
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "endpoint-lb-test",
+                "name": "lb-inject-test",
+                "templateId": "tpl-lb-test",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                with patch.object(
+                    LoadBalancerSlsResource, "is_deployed", return_value=False
+                ):
+                    with patch.object(
+                        ServerlessResource,
+                        "_get_module_path",
+                        return_value="myapp.handler",
+                    ):
+                        await resource._do_deploy()
+
+        # self.env should not be mutated
+        assert resource.env == env_before
+
+        # FLASH_MODULE_PATH and FLASH_ENDPOINT_TYPE should be in template env
+        payload = mock_client.save_endpoint.call_args.args[0]
+        template_env = payload.get("template", {}).get("env", [])
+        module_entries = [e for e in template_env if e["key"] == "FLASH_MODULE_PATH"]
+        assert len(module_entries) == 1
+        assert module_entries[0]["value"] == "myapp.handler"
+
+        type_entries = [e for e in template_env if e["key"] == "FLASH_ENDPOINT_TYPE"]
+        assert len(type_entries) == 1
+        assert type_entries[0]["value"] == "lb"

From ed9eb1ccf0bb7042ff158a2f13aa676ec18b5e7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Sat, 28 Feb 2026 11:18:41 -0800
Subject: [PATCH 3/6] fix: remove FLASH_ENDPOINT_TYPE injection from LB
 _do_deploy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The old self.env["FLASH_ENDPOINT_TYPE"] = "lb" was dead code — env is
in _input_only, so model_dump excluded it from the API payload. The
refactor to _inject_template_env made it actually reach the worker,
which triggered is_flash_deployment() -> maybe_unpack() -> artifact
not found error for flash run (live serverless) endpoints.

For flash deploy, the runtime resource_provisioner already sets
FLASH_ENDPOINT_TYPE=lb. This injection point is not needed.
---
 .../core/resources/load_balancer_sls_resource.py           | 7 ++++---
 tests/unit/resources/test_serverless.py                    | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/runpod_flash/core/resources/load_balancer_sls_resource.py b/src/runpod_flash/core/resources/load_balancer_sls_resource.py
index 6ee725bd..d31e2669 100644
--- a/src/runpod_flash/core/resources/load_balancer_sls_resource.py
+++ b/src/runpod_flash/core/resources/load_balancer_sls_resource.py
@@ -255,9 +255,10 @@ async def _do_deploy(self) -> "LoadBalancerSlsResource":
             return self
 
         try:
-            # Mark this endpoint as load-balanced (triggers auto-provisioning on boot).
-            # Injected into template.env (not self.env) to avoid false config drift.
-            self._inject_template_env("FLASH_ENDPOINT_TYPE", "lb")
+            # NOTE: FLASH_ENDPOINT_TYPE is NOT injected here. For flash deploy,
+            # the runtime resource_provisioner sets it. For flash run (live
+            # serverless), the worker must NOT see it — otherwise it triggers
+            # artifact unpacking which doesn't exist for live endpoints.
 
             # Call parent deploy (creates endpoint via RunPod API)
             log.debug(f"Deploying LB endpoint: {self.name}")
diff --git a/tests/unit/resources/test_serverless.py b/tests/unit/resources/test_serverless.py
index 0fe76ad3..61b0fe30 100644
--- a/tests/unit/resources/test_serverless.py
+++ b/tests/unit/resources/test_serverless.py
@@ -1619,13 +1619,14 @@ async def test_do_deploy_lb_injects_module_path_into_template_env(self):
         # self.env should not be mutated
         assert resource.env == env_before
 
-        # FLASH_MODULE_PATH and FLASH_ENDPOINT_TYPE should be in template env
+        # FLASH_MODULE_PATH should be in template env
         payload = mock_client.save_endpoint.call_args.args[0]
         template_env = payload.get("template", {}).get("env", [])
         module_entries = [e for e in template_env if e["key"] == "FLASH_MODULE_PATH"]
         assert len(module_entries) == 1
         assert module_entries[0]["value"] == "myapp.handler"
 
+        # FLASH_ENDPOINT_TYPE should NOT be injected here — it's set by the
+        # runtime resource_provisioner for flash deploy, not by _do_deploy
         type_entries = [e for e in template_env if e["key"] == "FLASH_ENDPOINT_TYPE"]
-        assert len(type_entries) == 1
-        assert type_entries[0]["value"] == "lb"
+        assert len(type_entries) == 0

From 6d3378661d6c3ed7743852e9f3e7fa5b9459fc90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Sat, 28 Feb 2026 13:07:11 -0800
Subject: [PATCH 4/6] fix: skip env in template update when user env unchanged

When updating an endpoint, the saveTemplate mutation previously always
sent the user's env vars, which overwrote platform-injected vars like
PORT and PORT_HEALTH on LB endpoints. This triggered unnecessary
rolling releases.

Now _build_template_update_payload accepts skip_env; update() compares
old vs new env and omits env from the template payload when unchanged,
letting the platform preserve its injected vars.
---
 src/runpod_flash/core/resources/serverless.py |  25 +++-
 tests/unit/resources/test_serverless.py       | 134 ++++++++++++++++++
 2 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/src/runpod_flash/core/resources/serverless.py b/src/runpod_flash/core/resources/serverless.py
index 487fef36..95757a95 100644
--- a/src/runpod_flash/core/resources/serverless.py
+++ b/src/runpod_flash/core/resources/serverless.py
@@ -533,12 +533,24 @@ def _payload_exclude(self) -> Set[str]:
 
     @staticmethod
     def _build_template_update_payload(
-        template: PodTemplate, template_id: str
+        template: PodTemplate,
+        template_id: str,
+        *,
+        skip_env: bool = False,
     ) -> Dict[str, Any]:
         """Build saveTemplate payload from template model.
 
         Keep this to fields supported by saveTemplate to avoid passing endpoint-only
         fields to the template mutation.
+
+        Args:
+            template: Template model with desired configuration.
+            template_id: ID of the template to update.
+            skip_env: When True, omit ``env`` from the payload so
+                saveTemplate preserves the existing template env vars.
+                This prevents removing platform-injected vars (e.g.
+                PORT, PORT_HEALTH on LB endpoints) when the user's
+                env hasn't actually changed.
         """
         template_data = template.model_dump(exclude_none=True, mode="json")
         allowed_fields = {
@@ -549,6 +561,8 @@ def _build_template_update_payload(
             "env",
             "readme",
         }
+        if skip_env:
+            allowed_fields.discard("env")
         payload = {
             key: value for key, value in template_data.items() if key in allowed_fields
         }
@@ -772,8 +786,15 @@ async def update(self, new_config: "ServerlessResource") -> "ServerlessResource"
 
                 if new_config.template:
                     if resolved_template_id:
+                        # Skip env in the template payload when the user's env
+                        # hasn't changed.  This lets the platform keep vars it
+                        # injected (e.g. PORT, PORT_HEALTH on LB endpoints)
+                        # and avoids a spurious rolling release.
+                        env_unchanged = self.env == new_config.env
                         template_payload = self._build_template_update_payload(
-                            new_config.template, resolved_template_id
+                            new_config.template,
+                            resolved_template_id,
+                            skip_env=env_unchanged,
                         )
                         await client.update_template(template_payload)
                         log.debug(
diff --git a/tests/unit/resources/test_serverless.py b/tests/unit/resources/test_serverless.py
index 61b0fe30..39de8f36 100644
--- a/tests/unit/resources/test_serverless.py
+++ b/tests/unit/resources/test_serverless.py
@@ -1630,3 +1630,137 @@ async def test_do_deploy_lb_injects_module_path_into_template_env(self):
         # runtime resource_provisioner for flash deploy, not by _do_deploy
         type_entries = [e for e in template_env if e["key"] == "FLASH_ENDPOINT_TYPE"]
         assert len(type_entries) == 0
+
+
+class TestBuildTemplateUpdatePayload:
+    """Test _build_template_update_payload skip_env behavior."""
+
+    def test_payload_includes_env_by_default(self):
+        """Template update payload includes env when skip_env is False."""
+        template = PodTemplate(
+            name="test-template",
+            imageName="test:latest",
+            env=[KeyValuePair(key="MY_VAR", value="my_val")],
+        )
+        payload = ServerlessResource._build_template_update_payload(template, "tpl-123")
+        assert "env" in payload
+        assert payload["env"] == [{"key": "MY_VAR", "value": "my_val"}]
+
+    def test_payload_excludes_env_when_skip_env_true(self):
+        """Template update payload omits env when skip_env is True.
+
+        This preserves platform-injected vars (e.g. PORT, PORT_HEALTH)
+        on the existing template.
+        """
+        template = PodTemplate(
+            name="test-template",
+            imageName="test:latest",
+            env=[KeyValuePair(key="MY_VAR", value="my_val")],
+        )
+        payload = ServerlessResource._build_template_update_payload(
+            template, "tpl-123", skip_env=True
+        )
+        assert "env" not in payload
+        # Other fields should still be present
+        assert payload["imageName"] == "test:latest"
+        assert payload["id"] == "tpl-123"
+
+    @pytest.mark.asyncio
+    async def test_update_skips_env_when_unchanged(self):
+        """update() omits env from template payload when env hasn't changed."""
+        env = {"LOG_LEVEL": "INFO"}
+        old_resource = ServerlessEndpoint(
+            name="update-test",
+            imageName="test:latest",
+            env=env,
+            flashboot=False,
+        )
+        old_resource.id = "ep-123"
+        old_resource.templateId = "tpl-123"
+
+        new_resource = ServerlessEndpoint(
+            name="update-test",
+            imageName="test:latest",
+            env=env,
+            flashboot=False,
+            workersMax=5,
+        )
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "ep-123",
+                "name": "update-test",
+                "templateId": "tpl-123",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+        mock_client.update_template = AsyncMock(return_value={})
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                await old_resource.update(new_resource)
+
+        # update_template was called, but env should NOT be in the payload
+        assert mock_client.update_template.called
+        template_payload = mock_client.update_template.call_args.args[0]
+        assert "env" not in template_payload
+
+    @pytest.mark.asyncio
+    async def test_update_includes_env_when_changed(self):
+        """update() includes env in template payload when env changed."""
+        old_resource = ServerlessEndpoint(
+            name="update-test",
+            imageName="test:latest",
+            env={"LOG_LEVEL": "INFO"},
+            flashboot=False,
+        )
+        old_resource.id = "ep-123"
+        old_resource.templateId = "tpl-123"
+
+        new_resource = ServerlessEndpoint(
+            name="update-test",
+            imageName="test:latest",
+            env={"LOG_LEVEL": "DEBUG", "NEW_VAR": "new_val"},
+            flashboot=False,
+        )
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "ep-123",
+                "name": "update-test",
+                "templateId": "tpl-123",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+        mock_client.update_template = AsyncMock(return_value={})
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                await old_resource.update(new_resource)
+
+        # update_template was called WITH env since it changed
+        assert mock_client.update_template.called
+        template_payload = mock_client.update_template.call_args.args[0]
+        assert "env" in template_payload

From c1f1ae39ba66690c94e5e9761a6885a56ea27149 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Sat, 28 Feb 2026 13:23:05 -0800
Subject: [PATCH 5/6] fix(serverless): correct stale inline comment in
 config_hash

The comment said env vars were excluded from the hash, but they are
included. The exclude set only contains RUNTIME_FIELDS and
EXCLUDED_HASH_FIELDS (id), not env.
---
 src/runpod_flash/core/resources/serverless.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/runpod_flash/core/resources/serverless.py b/src/runpod_flash/core/resources/serverless.py
index 95757a95..183d6834 100644
--- a/src/runpod_flash/core/resources/serverless.py
+++ b/src/runpod_flash/core/resources/serverless.py
@@ -271,7 +271,7 @@ def config_hash(self) -> str:
 
         resource_type = self.__class__.__name__
 
-        # Exclude runtime fields, env, and id from hash
+        # Exclude runtime fields and id from hash
         exclude_fields = (
             self.__class__.RUNTIME_FIELDS | self.__class__.EXCLUDED_HASH_FIELDS
         )

From e69e1c31fa9863d5b59eacf1aecd3739b9083bb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Sun, 1 Mar 2026 22:17:52 -0800
Subject: [PATCH 6/6] fix(serverless): inject runtime vars on update to survive
 template env overwrite

Extract _inject_runtime_template_vars() from _do_deploy so both initial
deploy and update() paths inject RUNPOD_API_KEY and FLASH_MODULE_PATH
into template.env. Without this, runtime vars set during _do_deploy
were silently dropped when update() overwrote the template env on
config drift. Also preserve explicit template.env entries when env
dict is empty on both sides.
---
 src/runpod_flash/core/resources/serverless.py |  83 ++++----
 tests/unit/resources/test_serverless.py       | 178 ++++++++++++++++++
 2 files changed, 229 insertions(+), 32 deletions(-)

diff --git a/src/runpod_flash/core/resources/serverless.py b/src/runpod_flash/core/resources/serverless.py
index 183d6834..381c5363 100644
--- a/src/runpod_flash/core/resources/serverless.py
+++ b/src/runpod_flash/core/resources/serverless.py
@@ -671,6 +671,41 @@ def _inject_template_env(self, key: str, value: str) -> None:
         if key not in existing_keys:
             self.template.env.append(KeyValuePair(key=key, value=value))
 
+    def _inject_runtime_template_vars(self) -> None:
+        """Inject runtime env vars into template.env without mutating self.env.
+
+        For QB endpoints making remote calls: injects RUNPOD_API_KEY.
+        For LB endpoints: injects FLASH_MODULE_PATH.
+
+        Called by both _do_deploy (initial) and update (env changes) so
+        runtime vars survive template updates.
+        """
+        env_dict = self.env or {}
+
+        if self.type == ServerlessType.QB:
+            if self._check_makes_remote_calls():
+                if "RUNPOD_API_KEY" not in env_dict:
+                    from runpod_flash.core.credentials import get_api_key
+
+                    api_key = get_api_key()
+                    if api_key:
+                        self._inject_template_env("RUNPOD_API_KEY", api_key)
+                        log.debug(
+                            f"{self.name}: Injected RUNPOD_API_KEY for remote calls "
+                            f"(makes_remote_calls=True)"
+                        )
+                    else:
+                        log.warning(
+                            f"{self.name}: makes_remote_calls=True but RUNPOD_API_KEY not set. "
+                            f"Remote calls to other endpoints will fail."
+                        )
+
+        elif self.type == ServerlessType.LB:
+            module_path = self._get_module_path()
+            if module_path and "FLASH_MODULE_PATH" not in env_dict:
+                self._inject_template_env("FLASH_MODULE_PATH", module_path)
+                log.debug(f"{self.name}: Injected FLASH_MODULE_PATH={module_path}")
+
     async def _do_deploy(self) -> "DeployableResource":
         """
         Deploys the serverless resource using the provided configuration.
@@ -686,37 +721,7 @@ async def _do_deploy(self) -> "DeployableResource":
                 log.debug(f"{self} exists")
                 return self
 
-            # Inject API key for queue-based endpoints that make remote calls.
-            # Injected into template.env (not self.env) to avoid false config drift.
-            if self.type == ServerlessType.QB:
-                makes_remote_calls = self._check_makes_remote_calls()
-
-                if makes_remote_calls:
-                    env_dict = self.env or {}
-                    if "RUNPOD_API_KEY" not in env_dict:
-                        from runpod_flash.core.credentials import get_api_key
-
-                        api_key = get_api_key()
-                        if api_key:
-                            self._inject_template_env("RUNPOD_API_KEY", api_key)
-                            log.debug(
-                                f"{self.name}: Injected RUNPOD_API_KEY for remote calls "
-                                f"(makes_remote_calls=True)"
-                            )
-                        else:
-                            log.warning(
-                                f"{self.name}: makes_remote_calls=True but RUNPOD_API_KEY not set. "
-                                f"Remote calls to other endpoints will fail."
-                            )
-
-            # Inject module path for load-balanced endpoints.
-            # Injected into template.env (not self.env) to avoid false config drift.
-            elif self.type == ServerlessType.LB:
-                env_dict = self.env or {}
-                module_path = self._get_module_path()
-                if module_path and "FLASH_MODULE_PATH" not in env_dict:
-                    self._inject_template_env("FLASH_MODULE_PATH", module_path)
-                    log.debug(f"{self.name}: Injected FLASH_MODULE_PATH={module_path}")
+            self._inject_runtime_template_vars()
 
             # Ensure network volume is deployed first
             await self._ensure_network_volume_deployed()
@@ -790,11 +795,25 @@ async def update(self, new_config: "ServerlessResource") -> "ServerlessResource"
                         # hasn't changed.  This lets the platform keep vars it
                         # injected (e.g. PORT, PORT_HEALTH on LB endpoints)
                         # and avoids a spurious rolling release.
+                        #
+                        # Also check template.env: if env is empty but the
+                        # caller provided explicit template env entries, those
+                        # must not be silently dropped.
                         env_unchanged = self.env == new_config.env
+                        has_explicit_template_env = (
+                            not new_config.env and new_config.template.env is not None
+                        )
+                        skip_env = env_unchanged and not has_explicit_template_env
+
+                        if not skip_env:
+                            # Inject runtime vars (RUNPOD_API_KEY, FLASH_MODULE_PATH)
+                            # so they survive the template env overwrite.
+                            new_config._inject_runtime_template_vars()
+
                         template_payload = self._build_template_update_payload(
                             new_config.template,
                             resolved_template_id,
-                            skip_env=env_unchanged,
+                            skip_env=skip_env,
                         )
                         await client.update_template(template_payload)
                         log.debug(
diff --git a/tests/unit/resources/test_serverless.py b/tests/unit/resources/test_serverless.py
index 39de8f36..581dfdc8 100644
--- a/tests/unit/resources/test_serverless.py
+++ b/tests/unit/resources/test_serverless.py
@@ -1764,3 +1764,181 @@ async def test_update_includes_env_when_changed(self):
         assert mock_client.update_template.called
         template_payload = mock_client.update_template.call_args.args[0]
         assert "env" in template_payload
+
+    @pytest.mark.asyncio
+    async def test_update_injects_runtime_vars_when_env_changed(self):
+        """update() injects RUNPOD_API_KEY into template.env when env changed.
+
+        Without this, runtime-injected vars (set during _do_deploy) would be
+        lost when update() overwrites the template env.
+        """
+        old_resource = ServerlessEndpoint(
+            name="update-inject-test",
+            imageName="test:latest",
+            env={"LOG_LEVEL": "INFO"},
+            flashboot=False,
+        )
+        old_resource.id = "ep-inject"
+        old_resource.templateId = "tpl-inject"
+
+        new_resource = ServerlessEndpoint(
+            name="update-inject-test",
+            imageName="test:latest",
+            env={"LOG_LEVEL": "DEBUG"},
+            flashboot=False,
+        )
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "ep-inject",
+                "name": "update-inject-test",
+                "templateId": "tpl-inject",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+        mock_client.update_template = AsyncMock(return_value={})
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                with patch.object(
+                    ServerlessResource,
+                    "_check_makes_remote_calls",
+                    return_value=True,
+                ):
+                    with patch.dict(os.environ, {"RUNPOD_API_KEY": "inject-key"}):
+                        await old_resource.update(new_resource)
+
+        template_payload = mock_client.update_template.call_args.args[0]
+        env_entries = template_payload.get("env", [])
+        api_key_entries = [e for e in env_entries if e["key"] == "RUNPOD_API_KEY"]
+        assert len(api_key_entries) == 1
+        assert api_key_entries[0]["value"] == "inject-key"
+
+    @pytest.mark.asyncio
+    async def test_update_skips_runtime_injection_when_env_unchanged(self):
+        """update() does not inject runtime vars when env is unchanged.
+
+        When skip_env=True, the template env payload is omitted entirely,
+        so runtime vars already on the platform are preserved as-is.
+        """
+        env = {"LOG_LEVEL": "INFO"}
+        old_resource = ServerlessEndpoint(
+            name="update-no-inject",
+            imageName="test:latest",
+            env=env,
+            flashboot=False,
+        )
+        old_resource.id = "ep-no-inject"
+        old_resource.templateId = "tpl-no-inject"
+
+        new_resource = ServerlessEndpoint(
+            name="update-no-inject",
+            imageName="test:latest",
+            env=env,
+            flashboot=False,
+        )
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "ep-no-inject",
+                "name": "update-no-inject",
+                "templateId": "tpl-no-inject",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+        mock_client.update_template = AsyncMock(return_value={})
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                with patch.object(
+                    ServerlessResource,
+                    "_check_makes_remote_calls",
+                    return_value=True,
+                ):
+                    with patch.dict(os.environ, {"RUNPOD_API_KEY": "inject-key"}):
+                        await old_resource.update(new_resource)
+
+        # env should be omitted from template payload (skip_env=True)
+        template_payload = mock_client.update_template.call_args.args[0]
+        assert "env" not in template_payload
+
+    @pytest.mark.asyncio
+    async def test_update_includes_env_for_explicit_template_env(self):
+        """update() sends env when caller provides explicit template.env with empty env.
+
+        Even if self.env == new_config.env (both empty), explicit template.env
+        entries must not be silently dropped.
+        """
+        old_resource = ServerlessEndpoint(
+            name="update-tpl-env",
+            imageName="test:latest",
+            env={},
+            flashboot=False,
+        )
+        old_resource.id = "ep-tpl-env"
+        old_resource.templateId = "tpl-tpl-env"
+
+        new_resource = ServerlessEndpoint(
+            name="update-tpl-env",
+            imageName="test:latest",
+            env={},
+            flashboot=False,
+            template=PodTemplate(
+                name="explicit-tpl",
+                imageName="test:latest",
+                env=[KeyValuePair(key="EXPLICIT_VAR", value="explicit_val")],
+            ),
+        )
+
+        mock_client = AsyncMock()
+        mock_client.save_endpoint = AsyncMock(
+            return_value={
+                "id": "ep-tpl-env",
+                "name": "update-tpl-env",
+                "templateId": "tpl-tpl-env",
+                "gpuIds": "AMPERE_48",
+                "allowedCudaVersions": "",
+            }
+        )
+        mock_client.update_template = AsyncMock(return_value={})
+
+        with patch(
+            "runpod_flash.core.resources.serverless.RunpodGraphQLClient"
+        ) as mock_client_class:
+            mock_client_class.return_value.__aenter__.return_value = mock_client
+            mock_client_class.return_value.__aexit__.return_value = None
+
+            with patch.object(
+                ServerlessResource,
+                "_ensure_network_volume_deployed",
+                new=AsyncMock(),
+            ):
+                await old_resource.update(new_resource)
+
+        template_payload = mock_client.update_template.call_args.args[0]
+        assert "env" in template_payload
+        env_entries = template_payload["env"]
+        explicit = [e for e in env_entries if e["key"] == "EXPLICIT_VAR"]
+        assert len(explicit) == 1