From 7488ec0a7f46bc631cf7c0c50fbf373fb7165831 Mon Sep 17 00:00:00 2001 From: Nan Liao Date: Sat, 23 May 2026 00:02:44 +0000 Subject: [PATCH 1/2] kaggle benchmarks init --- src/kaggle/api/kaggle_api_extended.py | 104 ++++++++++++++++++++----- src/kaggle/test/test_benchmarks_cli.py | 63 ++++++++++++--- 2 files changed, 138 insertions(+), 29 deletions(-) diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py index c182d432..385b681b 100644 --- a/src/kaggle/api/kaggle_api_extended.py +++ b/src/kaggle/api/kaggle_api_extended.py @@ -7022,55 +7022,104 @@ def _fetch_model_proxy_env(self): "MODEL_PROXY_EXPIRY_TIME": response.expiry_time.isoformat() + "Z" if response.expiry_time else "", } - def _write_benchmarks_env(self, env_vars, no_confirm, env_file): - env_file = os.path.abspath(env_file) + def _write_benchmarks_env(self, env_vars, no_confirm, env_file, quiet=False): + env_file_abs = os.path.abspath(env_file) + + print("The following configuration will be set:") + api_key = env_vars.get("MODEL_PROXY_API_KEY", "") - masked_api_key = "****************" + api_key[-4:] if len(api_key) > 4 else api_key + if api_key: + print(f" API Key (ends in ...{api_key[-4:]})") + expiry_iso = env_vars.get("MODEL_PROXY_EXPIRY_TIME", "") + if expiry_iso: + print(f" Expires: {self._format_expiry(expiry_iso)}") + + label_width = 17 + defaults = [ + ("Default LLM", env_vars.get("LLM_DEFAULT")), + ("Default Judge", env_vars.get("LLM_DEFAULT_EVAL")), + ] + defaults = [(label, value) for label, value in defaults if value] + for i, (label, value) in enumerate(defaults): + prefix = "\n" if i == 0 else "" + print(f"{prefix} {label.ljust(label_width)}{value}") + + llms_available = env_vars.get("LLMS_AVAILABLE", "") + if llms_available: + llms = [llm.strip() for llm in llms_available.split(",") if llm.strip()] + if llms: + label = "LLMs Available".ljust(label_width) + print(f"\n {label}{llms[0]}") + continuation = " " * (2 + label_width) + for llm in llms[1:]: + print(f"{continuation}{llm}") - print(f"The following environment variables will be written to {env_file}:\n") - for key, value in env_vars.items(): - display_value = masked_api_key if key == "MODEL_PROXY_API_KEY" else value - print(f" {key}={display_value}") print() if not no_confirm: - if not self.confirmation(f"write these environment variables to {env_file}", default_to_yes=True): - return + if not self.confirmation(f"write these settings to {os.path.basename(env_file_abs)}", default_to_yes=True): + return False - with open(env_file, "a") as f: + with open(env_file_abs, "a") as f: f.write("\n") for key, value in env_vars.items(): f.write(f"{key}={value}\n") - print(f"Environment variables have been written to {env_file}.") + if not quiet: + print(f"Environment variables have been written to {env_file_abs}.") + return True - def _write_benchmarks_example(self, example_file): + @staticmethod + def _format_expiry(iso_timestamp): + try: + expiry = datetime.fromisoformat(iso_timestamp.rstrip("Z")).replace(tzinfo=timezone.utc) + except ValueError: + return iso_timestamp + total = int((expiry - datetime.now(timezone.utc)).total_seconds()) + if total <= 0: + return "Expired" + if total < 3600: + n = max(1, total // 60) + return f"In {n} minute{'s' if n != 1 else ''}" + if total < 86400: + n = total // 3600 + return f"In {n} hour{'s' if n != 1 else ''}" + n = total // 86400 + return f"In {n} day{'s' if n != 1 else ''}" + + def _write_benchmarks_example(self, example_file, quiet=False): example_file = os.path.abspath(example_file) if os.path.exists(example_file): - print(f"Example file already exists at {example_file}, skipping.") + if not quiet: + print(f"Example file already exists at {example_file}, skipping.") return with open(example_file, "w") as f: f.write(BENCHMARKS_EXAMPLE_TASK) - print(f"Example benchmark task file has been written to {example_file}.") + if not quiet: + print(f"Example benchmark task file has been written to {example_file}.") - def _write_benchmarks_reference(self, directory): + def _write_benchmarks_reference(self, directory, quiet=False): ref_file = os.path.join(os.path.abspath(directory), "kaggle_benchmarks_reference.md") if os.path.exists(ref_file): - print(f"Reference file already exists at {ref_file}, skipping.") + if not quiet: + print(f"Reference file already exists at {ref_file}, skipping.") return with open(ref_file, "w") as f: f.write(BENCHMARKS_SYNTAX_REF) - print(f"Syntax reference has been written to {ref_file}.") + if not quiet: + print(f"Syntax reference has been written to {ref_file}.") def benchmarks_auth_cli(self, no_confirm=False, env_file=".env"): env_vars = self._fetch_model_proxy_env() self._write_benchmarks_env(env_vars, no_confirm, env_file) def benchmarks_init_cli(self, no_confirm=False, env_file=".env", example_file="example_task.py"): + print("Initializing Kaggle Benchmarks environment") + print(f" Target: {os.path.abspath(env_file)}\n") env_vars = self._fetch_model_proxy_env() env_vars.update( { @@ -7079,9 +7128,24 @@ def benchmarks_init_cli(self, no_confirm=False, env_file=".env", example_file="e "LLMS_AVAILABLE": "anthropic/claude-haiku-4-5@20251001,deepseek-ai/deepseek-v3.2,google/gemini-3-flash-preview,google/gemini-3.1-flash-lite-preview,openai/gpt-oss-120b,qwen/qwen3-next-80b-a3b-instruct,zai/glm-5", } ) - self._write_benchmarks_env(env_vars, no_confirm, env_file) - self._write_benchmarks_example(example_file) - self._write_benchmarks_reference(os.path.dirname(os.path.abspath(example_file))) + if not self._write_benchmarks_env(env_vars, no_confirm, env_file, quiet=True): + return + self._write_benchmarks_example(example_file, quiet=True) + self._write_benchmarks_reference(os.path.dirname(os.path.abspath(example_file)), quiet=True) + + env_name = os.path.basename(os.path.abspath(env_file)) + example_name = os.path.basename(os.path.abspath(example_file)) + ref_name = "kaggle_benchmarks_reference.md" + col_width = max(len(env_name), len(example_name), len(ref_name)) + 3 + + print("\nāœ… Environment initialized!") + print("\nFiles created in ./:") + print(f" {env_name.ljust(col_width)}(API keys & configuration)") + print(f" {example_name.ljust(col_width)}(Starter template)") + print(f" {ref_name.ljust(col_width)}(Syntax guide)") + print("\nNext step:") + print(" Run your first task using the example file:") + print(f" $ kaggle b t push what-is-kaggle -f {example_name} --wait") def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, verbose=False): if poll_interval is not None and poll_interval <= 0: diff --git a/src/kaggle/test/test_benchmarks_cli.py b/src/kaggle/test/test_benchmarks_cli.py index eb96e37b..5c08d2f8 100644 --- a/src/kaggle/test/test_benchmarks_cli.py +++ b/src/kaggle/test/test_benchmarks_cli.py @@ -1427,7 +1427,7 @@ def test_writes_env_file_with_yes_flag(self, api, mock_token, capsys, tmp_path): assert "MODEL_PROXY_API_KEY=kaggle-benchmarks:cool-token\n" in content assert "MODEL_PROXY_EXPIRY_TIME=2026-04-17T12:00:00Z\n" in content out = capsys.readouterr().out - assert "MODEL_PROXY_API_KEY=****************oken" in out + assert "API Key (ends in ...oken)" in out assert "kaggle-benchmarks:cool-token" not in out assert "have been written to" in out @@ -1437,7 +1437,7 @@ def test_aborted_on_no_confirm(self, api, mock_token, capsys, tmp_path): api.benchmarks_auth_cli(no_confirm=False, env_file=env_file) assert not (tmp_path / ".env").exists() out = capsys.readouterr().out - assert "MODEL_PROXY_URL" in out + assert "The following configuration will be set:" in out assert "have been written to" not in out def test_confirmed_on_yes(self, api, mock_token, capsys, tmp_path): @@ -1487,9 +1487,9 @@ def test_writes_all_vars(self, api, mock_token, capsys, tmp_path): in content ) out = capsys.readouterr().out - assert "MODEL_PROXY_API_KEY=****************oken" in out - assert "LLM_DEFAULT=google/gemini-3-flash-preview" in out - assert "have been written to" in out + assert "API Key (ends in ...oken)" in out + assert "Default LLM google/gemini-3-flash-preview" in out + assert "Environment initialized!" in out def test_writes_example_file(self, api, mock_token, capsys, tmp_path): env_file = str(tmp_path / ".env") @@ -1499,7 +1499,8 @@ def test_writes_example_file(self, api, mock_token, capsys, tmp_path): assert "import kaggle_benchmarks as kbench" in content assert "kaggle_benchmarks_reference.md" in content out = capsys.readouterr().out - assert "Example benchmark task file has been written to" in out + assert "example_task.py" in out + assert "Starter template" in out def test_writes_reference_file(self, api, mock_token, capsys, tmp_path): env_file = str(tmp_path / ".env") @@ -1510,8 +1511,8 @@ def test_writes_reference_file(self, api, mock_token, capsys, tmp_path): content = ref_file.read_text() assert "kaggle-benchmarks Task Syntax Reference" in content out = capsys.readouterr().out - assert "Syntax reference has been written to" in out assert "kaggle_benchmarks_reference.md" in out + assert "Syntax guide" in out def test_skips_reference_file_if_exists(self, api, mock_token, capsys, tmp_path): ref_file = tmp_path / "kaggle_benchmarks_reference.md" @@ -1521,7 +1522,7 @@ def test_skips_reference_file_if_exists(self, api, mock_token, capsys, tmp_path) api.benchmarks_init_cli(no_confirm=True, env_file=env_file, example_file=str(example_file)) assert ref_file.read_text() == "existing content\n" out = capsys.readouterr().out - assert "Reference file already exists" in out + assert "Environment initialized!" in out def test_skips_example_file_if_exists(self, api, mock_token, capsys, tmp_path): example_file = tmp_path / "example_task.py" @@ -1530,7 +1531,7 @@ def test_skips_example_file_if_exists(self, api, mock_token, capsys, tmp_path): api.benchmarks_init_cli(no_confirm=True, env_file=env_file, example_file=str(example_file)) assert example_file.read_text() == "existing content\n" out = capsys.readouterr().out - assert "already exists" in out + assert "Environment initialized!" in out def test_custom_example_file(self, api, mock_token, capsys, tmp_path): env_file = str(tmp_path / ".env") @@ -1556,6 +1557,50 @@ def test_appends_to_existing_file(self, api, mock_token, capsys, tmp_path): assert "LLM_DEFAULT=google/gemini-3-flash-preview\n" in content +# ============================================================ +# Expiry Formatting +# ============================================================ + + +class TestFormatExpiry: + """Tests for ``KaggleApi._format_expiry`` static helper.""" + + _NOW = "2026-05-23T12:00:00" # frozen "now" in UTC + + @pytest.fixture(autouse=True) + def _freeze_now(self): + from datetime import datetime as real_datetime, timezone + + frozen = real_datetime.fromisoformat(self._NOW).replace(tzinfo=timezone.utc) + + class FrozenDatetime(real_datetime): + @classmethod + def now(cls, tz=None): + return frozen if tz is None else frozen.astimezone(tz) + + with patch("kaggle.api.kaggle_api_extended.datetime", FrozenDatetime): + yield + + @pytest.mark.parametrize( + "expiry_iso, expected", + [ + ("2026-05-23T11:00:00Z", "Expired"), + ("2026-05-23T12:00:00Z", "Expired"), + ("2026-05-23T12:00:30Z", "In 1 minute"), + ("2026-05-23T12:05:00Z", "In 5 minutes"), + ("2026-05-23T13:00:00Z", "In 1 hour"), + ("2026-05-24T00:00:00Z", "In 12 hours"), + ("2026-05-24T12:00:00Z", "In 1 day"), + ("2026-05-30T12:00:00Z", "In 7 days"), + ], + ) + def test_relative_buckets(self, expiry_iso, expected): + assert KaggleApi._format_expiry(expiry_iso) == expected + + def test_unparseable_falls_back_to_raw(self): + assert KaggleApi._format_expiry("not-a-timestamp") == "not-a-timestamp" + + # ============================================================ # Task Name Detection # ============================================================ From 137eb5aa0debe7ad01e6ed0d5c7013298a294b39 Mon Sep 17 00:00:00 2001 From: Nan Liao Date: Sat, 23 May 2026 01:16:10 +0000 Subject: [PATCH 2/2] push command --- src/kaggle/api/kaggle_api_extended.py | 35 +++++++++++++++++--------- src/kaggle/test/test_benchmarks_cli.py | 16 ++++++------ 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py index 385b681b..7cf5ea7a 100644 --- a/src/kaggle/api/kaggle_api_extended.py +++ b/src/kaggle/api/kaggle_api_extended.py @@ -6952,8 +6952,7 @@ def _adaptive_sleep(current_interval, poll_interval, verbose=False): return min(poll_interval, int(current_interval * 1.5)) def _poll_task_creation(self, kaggle, task, wait, poll_interval, verbose=False): - """Poll task creation status until terminal or timeout.""" - print("Waiting for task to be processed...") + """Poll task creation status until terminal or timeout. Returns True on completion, False on timeout.""" start_time = time.time() current_interval = min(self._ADAPTIVE_POLL_START, poll_interval) while True: @@ -6961,8 +6960,7 @@ def _poll_task_creation(self, kaggle, task, wait, poll_interval, verbose=False): state = task_info.creation_state if state == BenchmarkTaskVersionCreationState.BENCHMARK_TASK_VERSION_CREATION_STATE_COMPLETED: - print(f"Task '{task}' creation completed.") - return + return True elif state not in self._PENDING_CREATION_STATES: error_msg = f"Task '{task}' creation failed with status: {self._clean_enum_str(state)}" error = getattr(task_info, "error", None) or getattr(task_info, "creation_error_message", None) @@ -6970,11 +6968,11 @@ def _poll_task_creation(self, kaggle, task, wait, poll_interval, verbose=False): error_msg += f" Error: {error}" raise ValueError(error_msg) - print(f" Task status: {self._clean_enum_str(state)}...") + print(f" Task status: {self._clean_enum_str(state)}...") if wait > 0 and (time.time() - start_time) > wait: print(f"Timed out waiting for task creation after {wait} seconds.") - return + return False current_interval = self._adaptive_sleep(current_interval, poll_interval, verbose) @@ -7172,6 +7170,7 @@ def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, ver with self.build_kaggle_client() as kaggle: # If a previous push is still being created, wait or error. task_info = self._get_benchmark_task(task_slug, kaggle, allow_not_found=True) + is_new_version = task_info is not None if task_info and task_info.creation_state in self._PENDING_CREATION_STATES: if wait is None: raise ValueError( @@ -7180,7 +7179,6 @@ def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, ver ) print(f"Task '{task_slug}' is already being created. Waiting for it to finish...") self._poll_task_creation(kaggle, task_slug, wait, poll_interval, verbose=verbose) - print(f"Pushing new version of '{task_slug}'...") request = ApiCreateBenchmarkTaskRequest() request.slug = task_slug @@ -7192,14 +7190,27 @@ def benchmarks_tasks_push_cli(self, task, file, wait=None, poll_interval=60, ver raise ValueError(f"Failed to push task: {error}") url = self._full_task_url(response.url) - print(f"Task '{task_slug}' pushed.") - print(f"\033[1mTask URL: {url}\033[0m") - print(f"To run this task against models, use: kaggle b t run {task_slug}") + model_output_url = re.sub(r"/\d+/?$", "", url) + "?compare=true" + banner_subject = f"new version of {task_slug}" if is_new_version else task_slug + print(f"\nšŸš€ Pushed {banner_subject}") + print(f" Task Details: {url}") if wait is None: - print(f"To check creation status, use: kaggle b t status {task_slug}") + print(f" Model Output: {model_output_url}") + print("\nNext steps:") + print(" Check creation status:") + print(f" kaggle b t status {task_slug}\n") + print(" Select models to run (or use --models to skip the menu):") + print(f" kaggle b t run {task_slug}") else: - self._poll_task_creation(kaggle, task_slug, wait, poll_interval, verbose=verbose) + print("\nā³ Status") + completed = self._poll_task_creation(kaggle, task_slug, wait, poll_interval, verbose=verbose) + if completed: + print("\nāœ… Completed") + print(f" Model Output: {model_output_url}") + print("\nNext step:") + print(" Select models to run (or use --models to skip the menu):") + print(f" kaggle b t run {task_slug}") def benchmarks_tasks_run_cli(self, task, model=None, wait=None, poll_interval=60, verbose=False): if poll_interval is not None and poll_interval <= 0: diff --git a/src/kaggle/test/test_benchmarks_cli.py b/src/kaggle/test/test_benchmarks_cli.py index 5c08d2f8..277291c0 100644 --- a/src/kaggle/test/test_benchmarks_cli.py +++ b/src/kaggle/test/test_benchmarks_cli.py @@ -242,6 +242,7 @@ def test_push_rejects_invalid_input(self, api, tmp_path, task, filename, content def test_push_creates_task(self, api, tmp_path, capsys, content, task_name, expected_slug): """Push converts .py -> ipynb via jupytext and creates the task.""" filepath = _write_task_file(tmp_path, content) + api._mock_benchmarks.get_benchmark_task.side_effect = HTTPError(response=MagicMock(status_code=404)) _setup_create_response(api, task_name) jt = _push(api, task_name, filepath) @@ -255,8 +256,9 @@ def test_push_creates_task(self, api, tmp_path, capsys, content, task_name, expe captured = capsys.readouterr() output = captured.out - assert f"Task '{expected_slug}' pushed." in output - assert "Task URL:" in output + assert f"šŸš€ Pushed {expected_slug}" in output + assert "Task Details:" in output + assert "Model Output:" in output assert f"kaggle b t run {expected_slug}" in output # When the original name differs from the slug, a normalization warning is printed to stderr. if task_name != expected_slug: @@ -269,7 +271,7 @@ def test_push_creates_new_task_without_prompting(self, api, tmp_path, capsys, st api._mock_benchmarks.get_benchmark_task.side_effect = HTTPError(response=MagicMock(status_code=status_code)) _setup_create_response(api) _push(api, "my-task", filepath) - assert "Task 'my-task' pushed." in capsys.readouterr().out + assert "šŸš€ Pushed my-task" in capsys.readouterr().out def test_push_prefixes_relative_url(self, api, tmp_path, capsys): """If url starts with '/', prefix https://www.kaggle.com.""" @@ -313,8 +315,7 @@ def test_push_wait_monitors_pending_then_pushes(self, api, capsys, tmp_path, sta output = capsys.readouterr().out assert "already being created" in output - assert "Pushing new version of 'my-task'" in output - assert "Task 'my-task' pushed." in output + assert "šŸš€ Pushed new version of my-task" in output # Verify the create API was still called (new version pushed) api._mock_benchmarks.create_benchmark_task.assert_called_once() @@ -351,8 +352,9 @@ def test_push_wait_polls_until_completion(self, api, capsys, tmp_path): api.benchmarks_tasks_push_cli("my-task", filepath, wait=0) output = capsys.readouterr().out - assert "Waiting for task to be processed" in output - assert "Task 'my-task' creation completed." in output + assert "ā³ Status" in output + assert "āœ… Completed" in output + assert "Model Output:" in output def test_push_adaptive_polling(self, api, tmp_path): filepath = _write_task_file(tmp_path)