Skip to content

Commit 9b1be75

Browse files
authored
Merge branch 'main' into fix/test_tasks/test_supervised_task
2 parents 9d4f27f + 25ba6f8 commit 9b1be75

17 files changed

Lines changed: 219 additions & 139 deletions

File tree

.github/workflows/test.yml

Lines changed: 74 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,27 @@ jobs:
3434
sklearn-only: ["true"]
3535

3636
exclude:
37-
# incompatible version combinations
37+
# (python, sklearn) combinations for which there is no PyPI release
38+
# scikit-learn 1.3
3839
- python-version: "3.13"
3940
scikit-learn: "1.3.*"
40-
- python-version: "3.13"
41-
scikit-learn: "1.4.*"
4241
- python-version: "3.14"
4342
scikit-learn: "1.3.*"
43+
# scikit-learn 1.4
44+
- python-version: "3.13"
45+
scikit-learn: "1.4.*"
4446
- python-version: "3.14"
4547
scikit-learn: "1.4.*"
48+
# scikit-learn 1.5
49+
- python-version: "3.14"
50+
scikit-learn: "1.5.*"
51+
# scikit-learn 1.6
52+
- python-version: "3.14"
53+
scikit-learn: "1.6.*"
54+
# scikit-learn 1.7 is installed with pandas 3
55+
- python-version: "3.10"
56+
scikit-learn: "1.7.*"
57+
4658

4759
include:
4860
# Full test run on ubuntu, 3.14
@@ -64,14 +76,6 @@ jobs:
6476
sklearn-only: "false"
6577
code-cov: true
6678

67-
# Pandas 2 run
68-
- os: ubuntu-latest
69-
python-version: "3.12"
70-
scikit-learn: "1.5.*"
71-
sklearn-only: "false"
72-
pandas-version: "2.*"
73-
code-cov: false
74-
7579
steps:
7680
- uses: actions/checkout@v6
7781
with:
@@ -82,15 +86,21 @@ jobs:
8286
with:
8387
python-version: ${{ matrix.python-version }}
8488

85-
- name: Install test dependencies, scikit-learn, and optional pandas
89+
- name: Install test dependencies, scikit-learn, and pandas
8690
shell: bash
8791
run: |
8892
python -m pip install --upgrade pip
8993
pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }}
90-
91-
if [ "${{ matrix.pandas-version }}" != "" ]; then
92-
echo "Installing specific pandas version: ${{ matrix.pandas-version }}"
93-
pip install "pandas==${{ matrix.pandas-version }}"
94+
95+
# scikit-learn 1.7+ requires pandas 3.x, earlier versions use pandas 2.x
96+
version="${{ matrix.scikit-learn }}"
97+
major=$(echo "$version" | cut -d. -f1)
98+
minor=$(echo "$version" | cut -d. -f2)
99+
100+
if [[ "$major" -gt 1 ]] || { [[ "$major" -eq 1 ]] && [[ "$minor" -ge 7 ]]; }; then
101+
pip install "pandas==3.*"
102+
else
103+
pip install "pandas==2.*"
94104
fi
95105
96106
- name: Store repository status
@@ -101,22 +111,46 @@ jobs:
101111
echo "BEFORE=$git_status" >> $GITHUB_ENV
102112
echo "Repository status before tests: $git_status"
103113
114+
- name: Clone Services
115+
if: matrix.os == 'ubuntu-latest'
116+
id: clone-services
117+
run: |
118+
git clone --depth 1 https://github.com/openml/services.git
119+
120+
- name: Start Docker Services
121+
id: start-services
122+
if: matrix.os == 'ubuntu-latest'
123+
working-directory: ./services
124+
run: |
125+
chmod -R a+rw ./data
126+
chmod -R a+rw ./logs
127+
docker compose --profile rest-api --profile minio --profile evaluation-engine up -d
128+
129+
echo "Waiting for PHP API to boot..."
130+
timeout 60s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done'
131+
132+
echo "Final Verification: Gateway Connectivity..."
133+
curl -sSfL http://localhost:8000/api/v1/xml/data/1 | head -n 15
134+
135+
docker container ls
136+
104137
- name: Show installed dependencies
105138
run: python -m pip list
106139

107140
- name: Run tests on Ubuntu Test
108141
if: matrix.os == 'ubuntu-latest'
109142
env:
110143
OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
144+
OPENML_USE_LOCAL_SERVICES: "true"
111145
run: |
112146
if [ "${{ matrix.code-cov }}" = "true" ]; then
113147
codecov="--cov=openml --long --cov-report=xml"
114148
fi
115149
116150
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
117-
marks="sklearn and not production_server and not test_server"
151+
marks="sklearn and not production_server"
118152
else
119-
marks="not production_server and not test_server"
153+
marks="not production_server"
120154
fi
121155
122156
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -125,15 +159,16 @@ jobs:
125159
if: matrix.os == 'ubuntu-latest'
126160
env:
127161
OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
162+
OPENML_USE_LOCAL_SERVICES: "true"
128163
run: |
129164
if [ "${{ matrix.code-cov }}" = "true" ]; then
130165
codecov="--cov=openml --long --cov-report=xml"
131166
fi
132167
133168
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
134-
marks="sklearn and production_server and not test_server"
169+
marks="sklearn and production_server"
135170
else
136-
marks="production_server and not test_server"
171+
marks="production_server"
137172
fi
138173
139174
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -145,6 +180,25 @@ jobs:
145180
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
146181
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
147182
183+
- name: Upload coverage
184+
if: matrix.code-cov && always()
185+
uses: codecov/codecov-action@v4
186+
with:
187+
files: coverage.xml
188+
token: ${{ secrets.CODECOV_TOKEN }}
189+
fail_ci_if_error: true
190+
verbose: true
191+
192+
- name: Dump server logs
193+
if: always() && steps.start-services.outcome == 'success'
194+
run: |
195+
docker logs openml-php-rest-api -t
196+
197+
- name: Cleanup Docker setup
198+
if: always() && steps.clone-services.outcome == 'success'
199+
run: |
200+
sudo rm -rf services
201+
148202
- name: Check for files left behind by test
149203
if: matrix.os != 'windows-latest' && always()
150204
run: |
@@ -157,15 +211,6 @@ jobs:
157211
exit 1
158212
fi
159213
160-
- name: Upload coverage
161-
if: matrix.code-cov && always()
162-
uses: codecov/codecov-action@v4
163-
with:
164-
files: coverage.xml
165-
token: ${{ secrets.CODECOV_TOKEN }}
166-
fail_ci_if_error: true
167-
verbose: true
168-
169214
dummy_windows_py_sk024:
170215
name: (windows-latest, Py, sk0.24.*, sk-only:false)
171216
runs-on: ubuntu-latest

openml/runs/functions.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,8 @@ def initialize_model_from_run(run_id: int, *, strict_version: bool = True) -> An
376376
run = get_run(run_id)
377377
# TODO(eddiebergman): I imagine this is None if it's not published,
378378
# might need to raise an explicit error for that
379-
assert run.setup_id is not None
379+
if run.setup_id is None:
380+
raise ValueError(f"Run {run_id} has no associated setup_id. Cannot initialize model.")
380381
return initialize_model(setup_id=run.setup_id, strict_version=strict_version)
381382

382383

@@ -416,7 +417,8 @@ def initialize_model_from_trace(
416417
run = get_run(run_id)
417418
# TODO(eddiebergman): I imagine this is None if it's not published,
418419
# might need to raise an explicit error for that
419-
assert run.flow_id is not None
420+
if run.flow_id is None:
421+
raise ValueError(f"Run {run_id} has no associated flow_id. Cannot initialize model.")
420422

421423
flow = get_flow(run.flow_id)
422424
run_trace = get_run_trace(run_id)
@@ -576,8 +578,10 @@ def _calculate_local_measure( # type: ignore
576578
_user_defined_measures_fold[openml_name] = sklearn_fn(_test_y, _pred_y)
577579

578580
if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
579-
assert test_y is not None
580-
assert proba_y is not None
581+
if test_y is None:
582+
raise ValueError("test_y cannot be None for classification tasks.")
583+
if proba_y is None:
584+
raise ValueError("proba_y cannot be None for classification tasks.")
581585

582586
for i, tst_idx in enumerate(test_indices):
583587
if task.class_labels is not None:
@@ -622,7 +626,8 @@ def _calculate_local_measure( # type: ignore
622626
)
623627

624628
elif isinstance(task, OpenMLRegressionTask):
625-
assert test_y is not None
629+
if test_y is None:
630+
raise ValueError("test_y cannot be None for regression tasks.")
626631
for i, _ in enumerate(test_indices):
627632
truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
628633
arff_line = format_prediction(
@@ -743,7 +748,8 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913
743748

744749
if isinstance(task, OpenMLSupervisedTask):
745750
x, y = task.get_X_and_y()
746-
assert isinstance(y, (pd.Series, pd.DataFrame))
751+
if not isinstance(y, (pd.Series, pd.DataFrame)):
752+
raise TypeError(f"y must be a pandas Series or DataFrame, got {type(y).__name__}")
747753
train_x = x.iloc[train_indices]
748754
train_y = y.iloc[train_indices]
749755
test_x = x.iloc[test_indices]
@@ -1213,7 +1219,11 @@ def __list_runs(api_call: str) -> pd.DataFrame:
12131219
f'"http://openml.org/openml": {runs_dict}',
12141220
)
12151221

1216-
assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"])
1222+
if not isinstance(runs_dict["oml:runs"]["oml:run"], list):
1223+
raise TypeError(
1224+
f"Expected runs_dict['oml:runs']['oml:run'] to be a list, "
1225+
f"got {type(runs_dict['oml:runs']['oml:run']).__name__}"
1226+
)
12171227

12181228
runs = {
12191229
int(r["oml:run_id"]): {

0 commit comments

Comments
 (0)