From 7488ec0a7f46bc631cf7c0c50fbf373fb7165831 Mon Sep 17 00:00:00 2001
From: Nan Liao <nanliao@google.com>
Date: Sat, 23 May 2026 00:02:44 +0000
Subject: [PATCH 1/2] kaggle benchmarks init

---
 src/kaggle/api/kaggle_api_extended.py  | 104 ++++++++++++++++++++-----
 src/kaggle/test/test_benchmarks_cli.py |  63 ++++++++++++---
 2 files changed, 138 insertions(+), 29 deletions(-)

diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py
index c182d432..385b681b 100644
--- a/src/kaggle/api/kaggle_api_extended.py
+++ b/src/kaggle/api/kaggle_api_extended.py
@@ -7022,55 +7022,104 @@ def _fetch_model_proxy_env(self):
             "MODEL_PROXY_EXPIRY_TIME": response.expiry_time.isoformat() + "Z" if response.expiry_time else "",
         }
 
-    def _write_benchmarks_env(self, env_vars, no_confirm, env_file):
-        env_file = os.path.abspath(env_file)
+    def _write_benchmarks_env(self, env_vars, no_confirm, env_file, quiet=False):
+        env_file_abs = os.path.abspath(env_file)
+
+        print("The following configuration will be set:")
+
         api_key = env_vars.get("MODEL_PROXY_API_KEY", "")
-        masked_api_key = "****************" + api_key[-4:] if len(api_key) > 4 else api_key
+        if api_key:
+            print(f"  API Key  (ends in ...{api_key[-4:]})")
+        expiry_iso = env_vars.get("MODEL_PROXY_EXPIRY_TIME", "")
+        if expiry_iso:
+            print(f"  Expires: {self._format_expiry(expiry_iso)}")
+
+        label_width = 17
+        defaults = [
+            ("Default LLM", env_vars.get("LLM_DEFAULT")),
+            ("Default Judge", env_vars.get("LLM_DEFAULT_EVAL")),
+        ]
+        defaults = [(label, value) for label, value in defaults if value]
+        for i, (label, value) in enumerate(defaults):
+            prefix = "\n" if i == 0 else ""
+            print(f"{prefix}  {label.ljust(label_width)}{value}")
+
+        llms_available = env_vars.get("LLMS_AVAILABLE", "")
+        if llms_available:
+            llms = [llm.strip() for llm in llms_available.split(",") if llm.strip()]
+            if llms:
+                label = "LLMs Available".ljust(label_width)
+                print(f"\n  {label}{llms[0]}")
+                continuation = " " * (2 + label_width)
+                for llm in llms[1:]:
+                    print(f"{continuation}{llm}")
 
-        print(f"The following environment variables will be written to {env_file}:\n")
-        for key, value in env_vars.items():
-            display_value = masked_api_key if key == "MODEL_PROXY_API_KEY" else value
-            print(f"  {key}={display_value}")
         print()
 
         if not no_confirm:
-            if not self.confirmation(f"write these environment variables to {env_file}", default_to_yes=True):
-                return
+            if not self.confirmation(f"write these settings to {os.path.basename(env_file_abs)}", default_to_yes=True):
+                return False
 
-        with open(env_file, "a") as f:
+        with open(env_file_abs, "a") as f:
             f.write("\n")
             for key, value in env_vars.items():
                 f.write(f"{key}={value}\n")
 
-        print(f"Environment variables have been written to {env_file}.")
+        if not quiet:
+            print(f"Environment variables have been written to {env_file_abs}.")
+        return True
 
-    def _write_benchmarks_example(self, example_file):
+    @staticmethod
+    def _format_expiry(iso_timestamp):
+        try:
+            expiry = datetime.fromisoformat(iso_timestamp.rstrip("Z")).replace(tzinfo=timezone.utc)
+        except ValueError:
+            return iso_timestamp
+        total = int((expiry - datetime.now(timezone.utc)).total_seconds())
+        if total <= 0:
+            return "Expired"
+        if total < 3600:
+            n = max(1, total // 60)
+            return f"In {n} minute{'s' if n != 1 else ''}"
+        if total < 86400:
+            n = total // 3600
+            return f"In {n} hour{'s' if n != 1 else ''}"
+        n = total // 86400
+        return f"In {n} day{'s' if n != 1 else ''}"
+
+    def _write_benchmarks_example(self, example_file, quiet=False):
         example_file = os.path.abspath(example_file)
         if os.path.exists(example_file):
-            print(f"Example file already exists at {example_file}, skipping.")
+            if not quiet:
+                print(f"Example file already exists at {example_file}, skipping.")
             return
 
         with open(example_file, "w") as f:
             f.write(BENCHMARKS_EXAMPLE_TASK)
 
-        print(f"Example benchmark task file has been written to {example_file}.")
+        if not quiet:
+            print(f"Example benchmark task file has been written to {example_file}.")
 
-    def _write_benchmarks_reference(self, directory):
+    def _write_benchmarks_reference(self, directory, quiet=False):
         ref_file = os.path.join(os.path.abspath(directory), "kaggle_benchmarks_reference.md")
         if os.path.exists(ref_file):
-            print(f"Reference file already exists at {ref_file}, skipping.")
+            if not quiet:
+                print(f"Reference file already exists at {ref_file}, skipping.")
             return
 
         with open(ref_file, "w") as f:
             f.write(BENCHMARKS_SYNTAX_REF)
 
-        print(f"Syntax reference has been written to {ref_file}.")
+        if not quiet:
+            print(f"Syntax reference has been written to {ref_file}.")
 
     def benchmarks_auth_cli(self, no_confirm=False, env_file=".env"):
         env_vars = self._fetch_model_proxy_env()
         self._write_benchmarks_env(env_vars, no_confirm, env_file)
 
     def benchmarks_init_cli(self, no_confirm=False, env_file=".env", example_file="example_task.py"):
+        print("Initializing Kaggle Benchmarks environment")
+        print(f"  Target:  {os.path.abspath(env_file)}\n")
         env_vars = self._fetch_model_proxy_env()
         env_vars.update(
             {
@@ -7079,9 +7128,24 @@ def benchmarks_init_cli(self, no_confirm=False, env_file=".env", example_file="e
                 "LLMS_AVAILABLE": "anthropic/claude-haiku-4-5@20251001,deepseek-ai/deepseek-v3.2,google/gemini-3-flash-preview,google/gemini-3.1-flash-lite-preview,openai/gpt-oss-120b,qwen/qwen3-next-80b-a3b-instruct,zai/glm-5",
             }
         )
-        self._write_benchmarks_env(env_vars, no_confirm, env_file)
-        self._write_benchmarks_example(example_file)
-        self._write_benchmarks_reference(os.path.dirname(os.path.abspath(example_file)))
+        if not self._write_benchmarks_env(env_vars, no_confirm, env_file, quiet=True):
+            return
+        self._write_benchmarks_example(example_file, quiet=True)
+        self._write_benchmarks_reference(os.path.dirname(os.path.abspath(example_file)), quiet=True)
+
+        env_name = os.path.basename(os.path.abspath(env_file))
+        example_name = os.path.basename(os.path.abspath(example_file))
+        ref_name = "kaggle_benchmarks_reference.md"
+        col_width = max(len(env_name), len(example_name), len(ref_name)) + 3
+
+        print("\n✅ Environment initialized!")
+        print("\nFiles created in ./:")
+        print(f"  {env_name.ljust(col_width)}(API keys & configuration)")
+        print(f"  {example_name.ljust(col_width)}(Starter template)")
+        print(f"  {ref_name.ljust(col_width)}(Syntax guide)")
+        print("\nNext step:")
+        print("  Run your first task using the example file:")
+        print(f"  $ kaggle b t push what-is-kaggle -f {example_name} --wait")
 
     def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, verbose=False):
         if poll_interval is not None and poll_interval <= 0:
diff --git a/src/kaggle/test/test_benchmarks_cli.py b/src/kaggle/test/test_benchmarks_cli.py
index eb96e37b..5c08d2f8 100644
--- a/src/kaggle/test/test_benchmarks_cli.py
+++ b/src/kaggle/test/test_benchmarks_cli.py
@@ -1427,7 +1427,7 @@ def test_writes_env_file_with_yes_flag(self, api, mock_token, capsys, tmp_path):
         assert "MODEL_PROXY_API_KEY=kaggle-benchmarks:cool-token\n" in content
         assert "MODEL_PROXY_EXPIRY_TIME=2026-04-17T12:00:00Z\n" in content
         out = capsys.readouterr().out
-        assert "MODEL_PROXY_API_KEY=****************oken" in out
+        assert "API Key  (ends in ...oken)" in out
         assert "kaggle-benchmarks:cool-token" not in out
         assert "have been written to" in out
 
@@ -1437,7 +1437,7 @@ def test_aborted_on_no_confirm(self, api, mock_token, capsys, tmp_path):
             api.benchmarks_auth_cli(no_confirm=False, env_file=env_file)
         assert not (tmp_path / ".env").exists()
         out = capsys.readouterr().out
-        assert "MODEL_PROXY_URL" in out
+        assert "The following configuration will be set:" in out
         assert "have been written to" not in out
 
     def test_confirmed_on_yes(self, api, mock_token, capsys, tmp_path):
@@ -1487,9 +1487,9 @@ def test_writes_all_vars(self, api, mock_token, capsys, tmp_path):
             in content
         )
         out = capsys.readouterr().out
-        assert "MODEL_PROXY_API_KEY=****************oken" in out
-        assert "LLM_DEFAULT=google/gemini-3-flash-preview" in out
-        assert "have been written to" in out
+        assert "API Key  (ends in ...oken)" in out
+        assert "Default LLM      google/gemini-3-flash-preview" in out
+        assert "Environment initialized!" in out
 
     def test_writes_example_file(self, api, mock_token, capsys, tmp_path):
         env_file = str(tmp_path / ".env")
@@ -1499,7 +1499,8 @@ def test_writes_example_file(self, api, mock_token, capsys, tmp_path):
         assert "import kaggle_benchmarks as kbench" in content
         assert "kaggle_benchmarks_reference.md" in content
         out = capsys.readouterr().out
-        assert "Example benchmark task file has been written to" in out
+        assert "example_task.py" in out
+        assert "Starter template" in out
 
     def test_writes_reference_file(self, api, mock_token, capsys, tmp_path):
         env_file = str(tmp_path / ".env")
@@ -1510,8 +1511,8 @@ def test_writes_reference_file(self, api, mock_token, capsys, tmp_path):
         content = ref_file.read_text()
         assert "kaggle-benchmarks Task Syntax Reference" in content
         out = capsys.readouterr().out
-        assert "Syntax reference has been written to" in out
         assert "kaggle_benchmarks_reference.md" in out
+        assert "Syntax guide" in out
 
     def test_skips_reference_file_if_exists(self, api, mock_token, capsys, tmp_path):
         ref_file = tmp_path / "kaggle_benchmarks_reference.md"
@@ -1521,7 +1522,7 @@ def test_skips_reference_file_if_exists(self, api, mock_token, capsys, tmp_path)
         api.benchmarks_init_cli(no_confirm=True, env_file=env_file, example_file=str(example_file))
         assert ref_file.read_text() == "existing content\n"
         out = capsys.readouterr().out
-        assert "Reference file already exists" in out
+        assert "Environment initialized!" in out
 
     def test_skips_example_file_if_exists(self, api, mock_token, capsys, tmp_path):
         example_file = tmp_path / "example_task.py"
@@ -1530,7 +1531,7 @@ def test_skips_example_file_if_exists(self, api, mock_token, capsys, tmp_path):
         api.benchmarks_init_cli(no_confirm=True, env_file=env_file, example_file=str(example_file))
         assert example_file.read_text() == "existing content\n"
         out = capsys.readouterr().out
-        assert "already exists" in out
+        assert "Environment initialized!" in out
 
     def test_custom_example_file(self, api, mock_token, capsys, tmp_path):
         env_file = str(tmp_path / ".env")
@@ -1556,6 +1557,50 @@ def test_appends_to_existing_file(self, api, mock_token, capsys, tmp_path):
         assert "LLM_DEFAULT=google/gemini-3-flash-preview\n" in content
 
 
+# ============================================================
+# Expiry Formatting
+# ============================================================
+
+
+class TestFormatExpiry:
+    """Tests for ``KaggleApi._format_expiry`` static helper."""
+
+    _NOW = "2026-05-23T12:00:00"  # frozen "now" in UTC
+
+    @pytest.fixture(autouse=True)
+    def _freeze_now(self):
+        from datetime import datetime as real_datetime, timezone
+
+        frozen = real_datetime.fromisoformat(self._NOW).replace(tzinfo=timezone.utc)
+
+        class FrozenDatetime(real_datetime):
+            @classmethod
+            def now(cls, tz=None):
+                return frozen if tz is None else frozen.astimezone(tz)
+
+        with patch("kaggle.api.kaggle_api_extended.datetime", FrozenDatetime):
+            yield
+
+    @pytest.mark.parametrize(
+        "expiry_iso, expected",
+        [
+            ("2026-05-23T11:00:00Z", "Expired"),
+            ("2026-05-23T12:00:00Z", "Expired"),
+            ("2026-05-23T12:00:30Z", "In 1 minute"),
+            ("2026-05-23T12:05:00Z", "In 5 minutes"),
+            ("2026-05-23T13:00:00Z", "In 1 hour"),
+            ("2026-05-24T00:00:00Z", "In 12 hours"),
+            ("2026-05-24T12:00:00Z", "In 1 day"),
+            ("2026-05-30T12:00:00Z", "In 7 days"),
+        ],
+    )
+    def test_relative_buckets(self, expiry_iso, expected):
+        assert KaggleApi._format_expiry(expiry_iso) == expected
+
+    def test_unparseable_falls_back_to_raw(self):
+        assert KaggleApi._format_expiry("not-a-timestamp") == "not-a-timestamp"
+
+
 # ============================================================
 # Task Name Detection
 # ============================================================

From 137eb5aa0debe7ad01e6ed0d5c7013298a294b39 Mon Sep 17 00:00:00 2001
From: Nan Liao <nanliao@google.com>
Date: Sat, 23 May 2026 01:16:10 +0000
Subject: [PATCH 2/2] push command

---
 src/kaggle/api/kaggle_api_extended.py  | 35 +++++++++++++++++---------
 src/kaggle/test/test_benchmarks_cli.py | 16 ++++++------
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py
index 385b681b..7cf5ea7a 100644
--- a/src/kaggle/api/kaggle_api_extended.py
+++ b/src/kaggle/api/kaggle_api_extended.py
@@ -6952,8 +6952,7 @@ def _adaptive_sleep(current_interval, poll_interval, verbose=False):
         return min(poll_interval, int(current_interval * 1.5))
 
     def _poll_task_creation(self, kaggle, task, wait, poll_interval, verbose=False):
-        """Poll task creation status until terminal or timeout."""
-        print("Waiting for task to be processed...")
+        """Poll task creation status until terminal or timeout. Returns True on completion, False on timeout."""
         start_time = time.time()
         current_interval = min(self._ADAPTIVE_POLL_START, poll_interval)
         while True:
@@ -6961,8 +6960,7 @@ def _poll_task_creation(self, kaggle, task, wait, poll_interval, verbose=False):
             state = task_info.creation_state
 
             if state == BenchmarkTaskVersionCreationState.BENCHMARK_TASK_VERSION_CREATION_STATE_COMPLETED:
-                print(f"Task '{task}' creation completed.")
-                return
+                return True
             elif state not in self._PENDING_CREATION_STATES:
                 error_msg = f"Task '{task}' creation failed with status: {self._clean_enum_str(state)}"
                 error = getattr(task_info, "error", None) or getattr(task_info, "creation_error_message", None)
@@ -6970,11 +6968,11 @@ def _poll_task_creation(self, kaggle, task, wait, poll_interval, verbose=False):
                     error_msg += f" Error: {error}"
                 raise ValueError(error_msg)
 
-            print(f"  Task status: {self._clean_enum_str(state)}...")
+            print(f"   Task status: {self._clean_enum_str(state)}...")
 
             if wait > 0 and (time.time() - start_time) > wait:
                 print(f"Timed out waiting for task creation after {wait} seconds.")
-                return
+                return False
 
             current_interval = self._adaptive_sleep(current_interval, poll_interval, verbose)
 
@@ -7172,6 +7170,7 @@ def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, ver
         with self.build_kaggle_client() as kaggle:
             # If a previous push is still being created, wait or error.
             task_info = self._get_benchmark_task(task_slug, kaggle, allow_not_found=True)
+            is_new_version = task_info is not None
             if task_info and task_info.creation_state in self._PENDING_CREATION_STATES:
                 if wait is None:
                     raise ValueError(
@@ -7180,7 +7179,6 @@ def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, ver
                     )
                 print(f"Task '{task_slug}' is already being created. Waiting for it to finish...")
                 self._poll_task_creation(kaggle, task_slug, wait, poll_interval, verbose=verbose)
-                print(f"Pushing new version of '{task_slug}'...")
 
             request = ApiCreateBenchmarkTaskRequest()
             request.slug = task_slug
@@ -7192,14 +7190,27 @@ def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, ver
                 raise ValueError(f"Failed to push task: {error}")
 
             url = self._full_task_url(response.url)
-            print(f"Task '{task_slug}' pushed.")
-            print(f"\033[1mTask URL: {url}\033[0m")
-            print(f"To run this task against models, use: kaggle b t run {task_slug}")
+            model_output_url = re.sub(r"/\d+/?$", "", url) + "?compare=true"
+            banner_subject = f"new version of {task_slug}" if is_new_version else task_slug
+            print(f"\n🚀 Pushed {banner_subject}")
+            print(f"   Task Details:  {url}")
 
             if wait is None:
-                print(f"To check creation status, use: kaggle b t status {task_slug}")
+                print(f"   Model Output:  {model_output_url}")
+                print("\nNext steps:")
+                print("   Check creation status:")
+                print(f"   kaggle b t status {task_slug}\n")
+                print("   Select models to run (or use --models to skip the menu):")
+                print(f"   kaggle b t run {task_slug}")
             else:
-                self._poll_task_creation(kaggle, task_slug, wait, poll_interval, verbose=verbose)
+                print("\n⏳ Status")
+                completed = self._poll_task_creation(kaggle, task_slug, wait, poll_interval, verbose=verbose)
+                if completed:
+                    print("\n✅ Completed")
+                    print(f"   Model Output:  {model_output_url}")
+                    print("\nNext step:")
+                    print("   Select models to run (or use --models to skip the menu):")
+                    print(f"   kaggle b t run {task_slug}")
 
     def benchmarks_tasks_run_cli(self, task, model=None, wait=None, poll_interval=60, verbose=False):
         if poll_interval is not None and poll_interval <= 0:
diff --git a/src/kaggle/test/test_benchmarks_cli.py b/src/kaggle/test/test_benchmarks_cli.py
index 5c08d2f8..277291c0 100644
--- a/src/kaggle/test/test_benchmarks_cli.py
+++ b/src/kaggle/test/test_benchmarks_cli.py
@@ -242,6 +242,7 @@ def test_push_rejects_invalid_input(self, api, tmp_path, task, filename, content
     def test_push_creates_task(self, api, tmp_path, capsys, content, task_name, expected_slug):
         """Push converts .py -> ipynb via jupytext and creates the task."""
         filepath = _write_task_file(tmp_path, content)
+        api._mock_benchmarks.get_benchmark_task.side_effect = HTTPError(response=MagicMock(status_code=404))
         _setup_create_response(api, task_name)
 
         jt = _push(api, task_name, filepath)
@@ -255,8 +256,9 @@ def test_push_creates_task(self, api, tmp_path, capsys, content, task_name, expe
 
         captured = capsys.readouterr()
         output = captured.out
-        assert f"Task '{expected_slug}' pushed." in output
-        assert "Task URL:" in output
+        assert f"🚀 Pushed {expected_slug}" in output
+        assert "Task Details:" in output
+        assert "Model Output:" in output
         assert f"kaggle b t run {expected_slug}" in output
         # When the original name differs from the slug, a normalization warning is printed to stderr.
         if task_name != expected_slug:
@@ -269,7 +271,7 @@ def test_push_creates_new_task_without_prompting(self, api, tmp_path, capsys, st
         api._mock_benchmarks.get_benchmark_task.side_effect = HTTPError(response=MagicMock(status_code=status_code))
         _setup_create_response(api)
         _push(api, "my-task", filepath)
-        assert "Task 'my-task' pushed." in capsys.readouterr().out
+        assert "🚀 Pushed my-task" in capsys.readouterr().out
 
     def test_push_prefixes_relative_url(self, api, tmp_path, capsys):
         """If url starts with '/', prefix https://www.kaggle.com."""
@@ -313,8 +315,7 @@ def test_push_wait_monitors_pending_then_pushes(self, api, capsys, tmp_path, sta
 
         output = capsys.readouterr().out
         assert "already being created" in output
-        assert "Pushing new version of 'my-task'" in output
-        assert "Task 'my-task' pushed." in output
+        assert "🚀 Pushed new version of my-task" in output
         # Verify the create API was still called (new version pushed)
         api._mock_benchmarks.create_benchmark_task.assert_called_once()
 
@@ -351,8 +352,9 @@ def test_push_wait_polls_until_completion(self, api, capsys, tmp_path):
             api.benchmarks_tasks_push_cli("my-task", filepath, wait=0)
 
         output = capsys.readouterr().out
-        assert "Waiting for task to be processed" in output
-        assert "Task 'my-task' creation completed." in output
+        assert "⏳ Status" in output
+        assert "✅ Completed" in output
+        assert "Model Output:" in output
 
     def test_push_adaptive_polling(self, api, tmp_path):
         filepath = _write_task_file(tmp_path)