openml
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 37 additions & 13 deletions b/‎.github/workflows/test.yml‎
Lines changed: 37 additions & 13 deletions
diff --git a/‎openml/runs/functions.py‎
Lines changed: 17 additions & 7 deletions b/‎openml/runs/functions.py‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎openml/runs/run.py‎
Lines changed: 58 additions & 70 deletions b/‎openml/runs/run.py‎
Lines changed: 58 additions & 70 deletions
diff --git a/‎openml/runs/trace.py‎
Lines changed: 12 additions & 3 deletions b/‎openml/runs/trace.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 2 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 2 additions & 0 deletions
@@ -101,22 +101,40 @@ jobs:
         echo "BEFORE=$git_status" >> $GITHUB_ENV
         echo "Repository status before tests: $git_status"
 
+    - name: Clone Services
+      if: matrix.os == 'ubuntu-latest'
+      run: |
+        git clone --depth 1 https://github.com/openml/services.git
+
+    - name: Start Docker Services
+      if: matrix.os == 'ubuntu-latest'
+      working-directory: ./services
+      run: |
+        docker compose --profile rest-api --profile minio up -d
+
+        echo "Waiting for PHP API to boot..."
+        timeout 60s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done'
+
+        echo "Final Verification: Gateway Connectivity..."
+        curl -sSfL http://localhost:8000/api/v1/xml/data/1 | head -n 15
+
     - name: Show installed dependencies
       run: python -m pip list
 
     - name: Run tests on Ubuntu Test
       if: matrix.os == 'ubuntu-latest'
       env:
         OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
+        OPENML_USE_LOCAL_SERVICES: "true"
       run: |
         if [ "${{ matrix.code-cov }}" = "true" ]; then
           codecov="--cov=openml --long --cov-report=xml"
         fi
 
         if [ "${{ matrix.sklearn-only }}" = "true" ]; then
-          marks="sklearn and not production_server and not test_server"
+          marks="sklearn and not production_server"
         else
-          marks="not production_server and not test_server"
+          marks="not production_server"
         fi
 
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -125,15 +143,16 @@ jobs:
       if: matrix.os == 'ubuntu-latest'
       env:
         OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
+        OPENML_USE_LOCAL_SERVICES: "true"
       run: |
         if [ "${{ matrix.code-cov }}" = "true" ]; then
           codecov="--cov=openml --long --cov-report=xml"
         fi
 
         if [ "${{ matrix.sklearn-only }}" = "true" ]; then
-          marks="sklearn and production_server and not test_server"
+          marks="sklearn and production_server"
         else
-          marks="production_server and not test_server"
+          marks="production_server"
         fi
 
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -145,6 +164,20 @@ jobs:
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
         pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
 
+    - name: Upload coverage
+      if: matrix.code-cov && always()
+      uses: codecov/codecov-action@v4
+      with:
+        files: coverage.xml
+        token: ${{ secrets.CODECOV_TOKEN }}
+        fail_ci_if_error: true
+        verbose: true
+
+    - name: Cleanup Docker setup
+      if: matrix.os == 'ubuntu-latest' && always()
+      run: |
+        sudo rm -rf services
+
     - name: Check for files left behind by test
       if: matrix.os != 'windows-latest' && always()
       run: |
@@ -157,15 +190,6 @@ jobs:
             exit 1
         fi
 
-    - name: Upload coverage
-      if: matrix.code-cov && always()
-      uses: codecov/codecov-action@v4
-      with:
-        files: coverage.xml
-        token: ${{ secrets.CODECOV_TOKEN }}
-        fail_ci_if_error: true
-        verbose: true
-
   dummy_windows_py_sk024:
     name: (windows-latest, Py, sk0.24.*, sk-only:false)
     runs-on: ubuntu-latest
 
@@ -375,7 +375,8 @@ def initialize_model_from_run(run_id: int, *, strict_version: bool = True) -> An
     run = get_run(run_id)
     # TODO(eddiebergman): I imagine this is None if it's not published,
     # might need to raise an explicit error for that
-    assert run.setup_id is not None
+    if run.setup_id is None:
+        raise ValueError(f"Run {run_id} has no associated setup_id. Cannot initialize model.")
     return initialize_model(setup_id=run.setup_id, strict_version=strict_version)
 
 
@@ -415,7 +416,8 @@ def initialize_model_from_trace(
     run = get_run(run_id)
     # TODO(eddiebergman): I imagine this is None if it's not published,
     # might need to raise an explicit error for that
-    assert run.flow_id is not None
+    if run.flow_id is None:
+        raise ValueError(f"Run {run_id} has no associated flow_id. Cannot initialize model.")
 
     flow = get_flow(run.flow_id)
     run_trace = get_run_trace(run_id)
@@ -575,8 +577,10 @@ def _calculate_local_measure(  # type: ignore
             _user_defined_measures_fold[openml_name] = sklearn_fn(_test_y, _pred_y)
 
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            assert test_y is not None
-            assert proba_y is not None
+            if test_y is None:
+                raise ValueError("test_y cannot be None for classification tasks.")
+            if proba_y is None:
+                raise ValueError("proba_y cannot be None for classification tasks.")
 
             for i, tst_idx in enumerate(test_indices):
                 if task.class_labels is not None:
@@ -621,7 +625,8 @@ def _calculate_local_measure(  # type: ignore
                 )
 
         elif isinstance(task, OpenMLRegressionTask):
-            assert test_y is not None
+            if test_y is None:
+                raise ValueError("test_y cannot be None for regression tasks.")
             for i, _ in enumerate(test_indices):
                 truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
                 arff_line = format_prediction(
@@ -742,7 +747,8 @@ def _run_task_get_arffcontent_parallel_helper(  # noqa: PLR0913
 
     if isinstance(task, OpenMLSupervisedTask):
         x, y = task.get_X_and_y()
-        assert isinstance(y, (pd.Series, pd.DataFrame))
+        if not isinstance(y, (pd.Series, pd.DataFrame)):
+            raise TypeError(f"y must be a pandas Series or DataFrame, got {type(y).__name__}")
         train_x = x.iloc[train_indices]
         train_y = y.iloc[train_indices]
         test_x = x.iloc[test_indices]
@@ -1212,7 +1218,11 @@ def __list_runs(api_call: str) -> pd.DataFrame:
             f'"http://openml.org/openml": {runs_dict}',
         )
 
-    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"])
+    if not isinstance(runs_dict["oml:runs"]["oml:run"], list):
+        raise TypeError(
+            f"Expected runs_dict['oml:runs']['oml:run'] to be a list, "
+            f"got {type(runs_dict['oml:runs']['oml:run']).__name__}"
+        )
 
     runs = {
         int(r["oml:run_id"]): {
 
@@ -389,6 +389,57 @@ def to_filesystem(
         if self.trace is not None:
             self.trace._to_filesystem(directory)
 
+    def _get_arff_attributes_for_task(self, task: OpenMLTask) -> list[tuple[str, Any]]:
+        """Get ARFF attributes based on task type.
+
+        Parameters
+        ----------
+        task : OpenMLTask
+            The task for which to generate attributes.
+
+        Returns
+        -------
+        list[tuple[str, Any]]
+            List of attribute tuples (name, type).
+        """
+        instance_specifications = [
+            ("repeat", "NUMERIC"),
+            ("fold", "NUMERIC"),
+        ]
+
+        if isinstance(task, (OpenMLLearningCurveTask, OpenMLClassificationTask)):
+            instance_specifications.append(("sample", "NUMERIC"))
+
+        instance_specifications.append(("row_id", "NUMERIC"))
+
+        if isinstance(task, (OpenMLLearningCurveTask, OpenMLClassificationTask)):
+            class_labels = task.class_labels
+            if class_labels is None:
+                raise ValueError("The task has no class labels")
+
+            prediction_confidences = [
+                ("confidence." + class_labels[i], "NUMERIC") for i in range(len(class_labels))
+            ]
+            prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
+            return instance_specifications + prediction_and_true + prediction_confidences
+
+        if isinstance(task, OpenMLRegressionTask):
+            return [*instance_specifications, ("prediction", "NUMERIC"), ("truth", "NUMERIC")]
+
+        if isinstance(task, OpenMLClusteringTask):
+            return [*instance_specifications, ("cluster", "NUMERIC")]
+
+        supported_task_types = [
+            TaskType.SUPERVISED_CLASSIFICATION,
+            TaskType.SUPERVISED_REGRESSION,
+            TaskType.CLUSTERING,
+            TaskType.LEARNING_CURVE,
+        ]
+        raise NotImplementedError(
+            f"Task type {task.task_type!s} for task_id {getattr(task, 'task_id', None)!s} "
+            f"is not yet supported. Supported task types are: {supported_task_types!r}"
+        )
+
     def _generate_arff_dict(self) -> OrderedDict[str, Any]:
         """Generates the arff dictionary for uploading predictions to the
         server.
@@ -406,7 +457,8 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
         if self.data_content is None:
             raise ValueError("Run has not been executed.")
         if self.flow is None:
-            assert self.flow_id is not None, "Run has no associated flow id!"
+            if self.flow_id is None:
+                raise ValueError("Run has no associated flow id!")
             self.flow = get_flow(self.flow_id)
 
         if self.description_text is None:
@@ -417,74 +469,7 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
         arff_dict["data"] = self.data_content
         arff_dict["description"] = self.description_text
         arff_dict["relation"] = f"openml_task_{task.task_id}_predictions"
-
-        if isinstance(task, OpenMLLearningCurveTask):
-            class_labels = task.class_labels
-            instance_specifications = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("sample", "NUMERIC"),
-                ("row_id", "NUMERIC"),
-            ]
-
-            arff_dict["attributes"] = instance_specifications
-            if class_labels is not None:
-                arff_dict["attributes"] = (
-                    arff_dict["attributes"]
-                    + [("prediction", class_labels), ("correct", class_labels)]
-                    + [
-                        ("confidence." + class_labels[i], "NUMERIC")
-                        for i in range(len(class_labels))
-                    ]
-                )
-            else:
-                raise ValueError("The task has no class labels")
-
-        elif isinstance(task, OpenMLClassificationTask):
-            class_labels = task.class_labels
-            instance_specifications = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("sample", "NUMERIC"),  # Legacy
-                ("row_id", "NUMERIC"),
-            ]
-
-            arff_dict["attributes"] = instance_specifications
-            if class_labels is not None:
-                prediction_confidences = [
-                    ("confidence." + class_labels[i], "NUMERIC") for i in range(len(class_labels))
-                ]
-                prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
-                arff_dict["attributes"] = (
-                    arff_dict["attributes"] + prediction_and_true + prediction_confidences
-                )
-            else:
-                raise ValueError("The task has no class labels")
-
-        elif isinstance(task, OpenMLRegressionTask):
-            arff_dict["attributes"] = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("row_id", "NUMERIC"),
-                ("prediction", "NUMERIC"),
-                ("truth", "NUMERIC"),
-            ]
-
-        elif isinstance(task, OpenMLClusteringTask):
-            arff_dict["attributes"] = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("row_id", "NUMERIC"),
-                ("cluster", "NUMERIC"),
-            ]
-
-        else:
-            raise NotImplementedError(
-                f"Task type '{task.task_type}' is not yet supported. "
-                f"Supported task types: Classification, Regression, Clustering, Learning Curve. "
-                f"Task ID: {task.task_id}. "
-                f"Please check the OpenML documentation for supported task types."
-            )
+        arff_dict["attributes"] = self._get_arff_attributes_for_task(task)
 
         return arff_dict
 
@@ -641,7 +626,10 @@ def _get_file_elements(self) -> dict:
 
         if self.parameter_settings is None:
             if self.flow is None:
-                assert self.flow_id is not None  # for mypy
+                if self.flow_id is None:
+                    raise ValueError(
+                        "Run has no associated flow_id and cannot obtain parameter values."
+                    )
                 self.flow = openml.flows.get_flow(self.flow_id)
             self.parameter_settings = self.flow.extension.obtain_parameter_values(
                 self.flow,
 
@@ -94,7 +94,8 @@ def get_parameters(self) -> dict[str, Any]:
                 for param, value in self.setup_string.items()
             }
 
-        assert self.parameters is not None
+        if self.parameters is None:
+            raise ValueError("Parameters must be set before calling get_parameters().")
         return {param[len(PREFIX) :]: value for param, value in self.parameters.items()}
 
 
@@ -490,13 +491,21 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
             for iteration in trace:
                 key = (iteration.repeat, iteration.fold, iteration.iteration)
 
-                assert iteration.parameters is not None
+                if iteration.parameters is None:
+                    raise ValueError(
+                        f"Iteration parameters cannot be None for repeat {iteration.repeat}, "
+                        f"fold {iteration.fold}, iteration {iteration.iteration}"
+                    )
                 param_keys = iteration.parameters.keys()
 
                 if previous_iteration is not None:
                     trace_itr = merged_trace[previous_iteration]
 
-                    assert trace_itr.parameters is not None
+                    if trace_itr.parameters is None:
+                        raise ValueError(
+                            f"Trace iteration parameters cannot be None "
+                            f"for iteration {previous_iteration}"
+                        )
                     trace_itr_keys = trace_itr.parameters.keys()
 
                     if list(param_keys) != list(trace_itr_keys):
 
@@ -289,6 +289,8 @@ def as_robot() -> Iterator[None]:
 @pytest.fixture(autouse=True)
 def with_server(request):
     openml.config.set_api_version(APIVersion.V1)
+    if os.getenv("OPENML_USE_LOCAL_SERVICES") == "true":
+        openml.config.TEST_SERVER_URL = "http://localhost:8000"
     if "production_server" in request.keywords:
         openml.config.set_servers("production")
         yield