diff --git a/doc/index.rst b/doc/index.rst index b8856e83b..da48194eb 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -30,7 +30,7 @@ Example ('estimator', tree.DecisionTreeClassifier()) ] ) - # Download the OpenML task for the german credit card dataset with 10-fold + # Download the OpenML task for the pendigits dataset with 10-fold # cross-validation. task = openml.tasks.get_task(32) # Run the scikit-learn model on the task. diff --git a/doc/progress.rst b/doc/progress.rst index 6b58213e5..d546ac4bd 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -9,6 +9,7 @@ Changelog 0.13.1 ~~~~~~ + * DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working. * ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``). * ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server. * ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API. diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py index 2dae4047f..3d33f1546 100644 --- a/examples/30_extended/configure_logging.py +++ b/examples/30_extended/configure_logging.py @@ -37,8 +37,8 @@ import logging -openml.config.console_log.setLevel(logging.DEBUG) -openml.config.file_log.setLevel(logging.WARNING) +openml.config.set_console_log_level(logging.DEBUG) +openml.config.set_file_log_level(logging.WARNING) openml.datasets.get_dataset("iris") # Now the log level that was previously written to file should also be shown in the console. diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py index 513d445ba..241f3e6eb 100644 --- a/examples/30_extended/custom_flow_.py +++ b/examples/30_extended/custom_flow_.py @@ -77,6 +77,8 @@ # you can use the Random Forest Classifier flow as a *subflow*. It allows for # all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow. # +# Note: you can currently only specific one subflow as part of the components. +# # In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow. # This allows people to specify auto-sklearn hyperparameters used in this flow. # In general, using a subflow is not required. @@ -87,6 +89,8 @@ autosklearn_flow = openml.flows.get_flow(9313) # auto-sklearn 0.5.1 subflow = dict( components=OrderedDict(automl_tool=autosklearn_flow), + # If you do not want to reference a subflow, you can use the following: + # components=OrderedDict(), ) #################################################################################################### @@ -124,7 +128,7 @@ OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]), ] -task_id = 1965 # Iris Task +task_id = 1200 # Iris Task task = openml.tasks.get_task(task_id) dataset_id = task.get_dataset().dataset_id diff --git a/openml/utils.py b/openml/utils.py index 3c2fa876f..19f77f8c6 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -283,7 +283,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters): if len(result) == 0: result = new_batch else: - result = result.append(new_batch, ignore_index=True) + result = pd.concat([result, new_batch], ignore_index=True) else: # For output_format = 'dict' or 'object' result.update(new_batch) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index a5add31c8..8558d27c8 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -18,6 +18,23 @@ def mocked_perform_api_call(call, request_method): def test_list_all(self): openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) + openml.utils._list_all( + listing_call=openml.tasks.functions._list_tasks, output_format="dataframe" + ) + + def test_list_all_with_multiple_batches(self): + res = openml.utils._list_all( + listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=2000 + ) + # Verify that test server state is still valid for this test to work as intended + # -> If the number of results is less than 2000, the test can not test the + # batching operation. + assert len(res) > 2000 + openml.utils._list_all( + listing_call=openml.tasks.functions._list_tasks, + output_format="dataframe", + batch_size=2000, + ) @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call) def test_list_all_few_results_available(self, _perform_api_call):