mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-25 08:05:36 +00:00
Merge branch 'main' into support_percentages
This commit is contained in:
commit
60a3e85b98
@ -2604,11 +2604,12 @@ class AutoML(BaseEstimator):
|
||||
min_sample_size = min_sample_size or self._settings.get("min_sample_size")
|
||||
use_ray = self._settings.get("use_ray") if use_ray is None else use_ray
|
||||
use_spark = self._settings.get("use_spark") if use_spark is None else use_spark
|
||||
spark_available, spark_error_msg = check_spark()
|
||||
if use_spark and use_ray is not False:
|
||||
raise ValueError("use_spark and use_ray cannot be both True.")
|
||||
elif use_spark and not spark_available:
|
||||
raise spark_error_msg
|
||||
elif use_spark:
|
||||
spark_available, spark_error_msg = check_spark()
|
||||
if not spark_available:
|
||||
raise spark_error_msg
|
||||
|
||||
old_level = logger.getEffectiveLevel()
|
||||
self.verbose = verbose
|
||||
@ -2626,18 +2627,20 @@ class AutoML(BaseEstimator):
|
||||
"Ray installed, setting use_ray to True. If you want to use Spark, set use_spark to True."
|
||||
)
|
||||
use_ray = True
|
||||
elif spark_available:
|
||||
logger.warning(
|
||||
"n_concurrent_trials > 1 is only supported when using Ray or Spark. "
|
||||
"Spark installed, setting use_spark to True. If you want to use Ray, set use_ray to True."
|
||||
)
|
||||
use_spark = True
|
||||
else:
|
||||
logger.warning(
|
||||
"n_concurrent_trials > 1 is only supported when using Ray or Spark. "
|
||||
"Neither Ray nor Spark installed, setting n_concurrent_trials to 1."
|
||||
)
|
||||
n_concurrent_trials = 1
|
||||
spark_available, _ = check_spark()
|
||||
if spark_available:
|
||||
logger.warning(
|
||||
"n_concurrent_trials > 1 is only supported when using Ray or Spark. "
|
||||
"Spark installed, setting use_spark to True. If you want to use Ray, set use_ray to True."
|
||||
)
|
||||
use_spark = True
|
||||
else:
|
||||
logger.warning(
|
||||
"n_concurrent_trials > 1 is only supported when using Ray or Spark. "
|
||||
"Neither Ray nor Spark installed, setting n_concurrent_trials to 1."
|
||||
)
|
||||
n_concurrent_trials = 1
|
||||
|
||||
self._state.n_jobs = n_jobs
|
||||
self._n_concurrent_trials = n_concurrent_trials
|
||||
|
@ -90,7 +90,8 @@ class BlendSearch(Searcher):
|
||||
needing to re-compute the trial. Must be the same or shorter length than
|
||||
points_to_evaluate. When provided, `mode` must be specified.
|
||||
time_budget_s: int or float | Time budget in seconds.
|
||||
num_samples: int | The number of configs to try.
|
||||
num_samples: int | The number of configs to try. -1 means no limit on the
|
||||
number of configs to try.
|
||||
resource_attr: A string to specify the resource dimension and the best
|
||||
performance is assumed to be at the max_resource.
|
||||
min_resource: A float of the minimal resource to use for the resource_attr.
|
||||
@ -222,11 +223,12 @@ class BlendSearch(Searcher):
|
||||
else:
|
||||
gs_space = space
|
||||
gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32)
|
||||
self._gs_seed = gs_seed
|
||||
if experimental:
|
||||
import optuna as ot
|
||||
|
||||
sampler = ot.samplers.TPESampler(
|
||||
seed=seed, multivariate=True, group=True
|
||||
seed=gs_seed, multivariate=True, group=True
|
||||
)
|
||||
else:
|
||||
sampler = None
|
||||
@ -306,7 +308,7 @@ class BlendSearch(Searcher):
|
||||
space=self._gs._space,
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
sampler=self._gs._sampler,
|
||||
seed=self._gs_seed,
|
||||
)
|
||||
self._gs.space = self._ls.space
|
||||
self._init_search()
|
||||
@ -322,11 +324,12 @@ class BlendSearch(Searcher):
|
||||
self.cost_attr = self._ls.cost_attr = TIME_TOTAL_S
|
||||
if "metric_target" in spec:
|
||||
self._metric_target = spec.get("metric_target")
|
||||
if "num_samples" in spec:
|
||||
num_samples = spec.get("num_samples")
|
||||
if num_samples is not None:
|
||||
self._num_samples = (
|
||||
spec["num_samples"]
|
||||
+ len(self._result)
|
||||
+ len(self._trial_proposed_by)
|
||||
(num_samples + len(self._result) + len(self._trial_proposed_by))
|
||||
if num_samples > 0 # 0 is currently treated the same as -1
|
||||
else num_samples
|
||||
)
|
||||
return True
|
||||
|
||||
|
@ -547,8 +547,8 @@ def complete_config(
|
||||
domain.categories[index],
|
||||
flow2,
|
||||
disturb,
|
||||
lower and lower[key][index],
|
||||
upper and upper[key][index],
|
||||
lower and lower.get(key) and lower[key][index],
|
||||
upper and upper.get(key) and upper[key][index],
|
||||
)
|
||||
assert (
|
||||
"_choice_" not in subspace[key]
|
||||
@ -560,8 +560,8 @@ def complete_config(
|
||||
space[key],
|
||||
flow2,
|
||||
disturb,
|
||||
lower and lower[key],
|
||||
upper and upper[key],
|
||||
lower and lower.get(key),
|
||||
upper and upper.get(key),
|
||||
)
|
||||
continue
|
||||
subspace[key] = domain
|
||||
|
@ -31,23 +31,21 @@ def check_spark():
|
||||
Return (True, None) if the check passes, otherwise log the exception message and
|
||||
return (False, Exception(msg)). The exception can be raised by the caller.
|
||||
"""
|
||||
logger.warning("\ncheck Spark installation...This line should appear only once.\n")
|
||||
logger.debug("\ncheck Spark installation...This line should appear only once.\n")
|
||||
if not _have_spark:
|
||||
msg = """use_spark=True requires installation of PySpark. Please run pip install flaml[spark]
|
||||
and check [here](https://spark.apache.org/docs/latest/api/python/getting_started/install.html)
|
||||
for more details about installing Spark."""
|
||||
logger.warning(msg)
|
||||
return False, ImportError(msg)
|
||||
|
||||
if _spark_major_minor_version[0] < 3:
|
||||
msg = "Spark version must be >= 3.0 to use flaml[spark]"
|
||||
logger.warning(msg)
|
||||
return False, ImportError(msg)
|
||||
|
||||
try:
|
||||
SparkSession.builder.getOrCreate()
|
||||
except RuntimeError as e:
|
||||
logger.warning(f"\nSparkSession is not available: {e}\n")
|
||||
# logger.warning(f"\nSparkSession is not available: {e}\n")
|
||||
return False, RuntimeError(e)
|
||||
|
||||
return True, None
|
||||
|
@ -30,7 +30,7 @@ class SimpleTrial(Trial):
|
||||
self.config = config or {}
|
||||
self.status = Trial.PENDING
|
||||
self.start_time = None
|
||||
self.last_result = {}
|
||||
self.last_result = None
|
||||
self.last_update_time = -float("inf")
|
||||
self.custom_trial_name = None
|
||||
self.trainable_name = "trainable"
|
||||
|
@ -470,7 +470,9 @@ def run(
|
||||
logger.addHandler(old_handlers[0])
|
||||
if verbose > 0:
|
||||
if log_file_name:
|
||||
os.makedirs(os.path.dirname(log_file_name), exist_ok=True)
|
||||
dir_name = os.path.dirname(log_file_name)
|
||||
if dir_name:
|
||||
os.makedirs(dir_name, exist_ok=True)
|
||||
logger.addHandler(logging.FileHandler(log_file_name))
|
||||
elif not logger.hasHandlers():
|
||||
# Add the console handler.
|
||||
@ -811,6 +813,10 @@ def run(
|
||||
report(_metric=result)
|
||||
_runner.stop_trial(trial_to_run)
|
||||
num_failures = 0
|
||||
if trial_to_run.last_result is None:
|
||||
# application stops tuning by returning None
|
||||
# TODO document this feature when it is finalized
|
||||
break
|
||||
else:
|
||||
# break with upperbound_num_failures consecutive failures
|
||||
num_failures += 1
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "1.1.0"
|
||||
__version__ = "1.1.2"
|
||||
|
@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"slideshow": {
|
||||
@ -38,10 +39,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install flaml[notebook]\n",
|
||||
"# From v0.6.6, catboost is made an optional dependency to build conda package.\n",
|
||||
"# To install catboost, you can run:\n",
|
||||
"# %pip install flaml[catboost]"
|
||||
"# %pip install flaml[notebook]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -749,7 +747,8 @@
|
||||
"xgb = XGBClassifier()\n",
|
||||
"cat_columns = X_train.select_dtypes(include=['category']).columns\n",
|
||||
"X = X_train.copy()\n",
|
||||
"X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n"
|
||||
"X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n",
|
||||
"y_train_xgb = y_train.astype(\"int\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -758,7 +757,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"xgb.fit(X, y_train)"
|
||||
"xgb.fit(X, y_train_xgb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -769,7 +768,8 @@
|
||||
"source": [
|
||||
"X = X_test.copy()\n",
|
||||
"X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n",
|
||||
"y_pred_xgb = xgb.predict(X)"
|
||||
"y_pred_xgb = xgb.predict(X)\n",
|
||||
"y_test_xgb = y_test.astype(\"int\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -788,7 +788,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print('default xgboost accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_xgb, y_test))\n",
|
||||
"print('default xgboost accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_xgb, y_test_xgb))\n",
|
||||
"print('default lgbm accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_lgbm, y_test))\n",
|
||||
"print('flaml (10 min) accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred, y_test))"
|
||||
]
|
||||
@ -1283,7 +1283,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15"
|
||||
"version": "3.9.15 (main, Oct 26 2022, 03:47:43) \n[GCC 10.2.1 20210110]"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
@ -28,7 +28,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U flaml openml;"
|
||||
"# %pip install -U flaml openml;"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -520,7 +520,7 @@
|
||||
" \"task\": \"regression\",\n",
|
||||
" \"starting_points\": \"data\",\n",
|
||||
" \"estimator_list\": [\"lgbm\"],\n",
|
||||
" \"time_budget\": 600,\n",
|
||||
" \"time_budget\": 300,\n",
|
||||
"}\n",
|
||||
"automl.fit(X_train, y_train, **settings)"
|
||||
]
|
||||
@ -545,7 +545,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
"version": "3.9.15 (main, Oct 26 2022, 03:47:43) \n[GCC 10.2.1 20210110]"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
1
setup.py
1
setup.py
@ -75,6 +75,7 @@ setuptools.setup(
|
||||
"joblibspark>=0.5.0",
|
||||
"nbconvert",
|
||||
"nbformat",
|
||||
"ipykernel",
|
||||
],
|
||||
"catboost": ["catboost>=0.26"],
|
||||
"blendsearch": ["optuna==2.8.0"],
|
||||
|
45
test/automl/test_notebook.py
Normal file
45
test/automl/test_notebook.py
Normal file
@ -0,0 +1,45 @@
|
||||
import nbformat
|
||||
from nbconvert.preprocessors import ExecutePreprocessor
|
||||
from nbconvert.preprocessors import CellExecutionError
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
def run_notebook(input_nb, output_nb="executed_notebook.ipynb", save=False):
|
||||
try:
|
||||
file_path = os.path.join(here, os.pardir, os.pardir, "notebook", input_nb)
|
||||
with open(file_path) as f:
|
||||
nb = nbformat.read(f, as_version=4)
|
||||
ep = ExecutePreprocessor(timeout=3600, kernel_name="python3")
|
||||
ep.preprocess(nb, {"metadata": {"path": here}})
|
||||
except CellExecutionError:
|
||||
raise
|
||||
finally:
|
||||
if save:
|
||||
with open(os.path.join(here, output_nb), "w", encoding="utf-8") as f:
|
||||
nbformat.write(nb, f)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform != "darwin" or "3.8" not in sys.version,
|
||||
reason="Only run on macOS with Python 3.8",
|
||||
)
|
||||
def test_automl_classification(save=False):
|
||||
run_notebook("automl_classification.ipynb", save=save)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform != "darwin" or "3.7" not in sys.version,
|
||||
reason="Only run on macOS with Python 3.7",
|
||||
)
|
||||
def test_zeroshot_lightgbm(save=False):
|
||||
run_notebook("zeroshot_lightgbm.ipynb", save=save)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test_automl_classification(save=True)
|
||||
test_zeroshot_lightgbm(save=True)
|
@ -25,8 +25,8 @@ def run_notebook(input_nb, output_nb="executed_notebook.ipynb", save=False):
|
||||
ep.preprocess(nb, {"metadata": {"path": here}})
|
||||
except CellExecutionError:
|
||||
raise
|
||||
except Exception as e:
|
||||
print("\nIgnoring below error:\n", e, "\n\n")
|
||||
# except Exception as e:
|
||||
# print("\nIgnoring below error:\n", e, "\n\n")
|
||||
finally:
|
||||
if save:
|
||||
with open(os.path.join(here, output_nb), "w", encoding="utf-8") as f:
|
||||
|
@ -34,7 +34,7 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
}
|
||||
if externally_setup_searcher:
|
||||
if externally_setup_searcher is True:
|
||||
|
||||
searcher = BlendSearch(
|
||||
space=search_space,
|
||||
@ -84,8 +84,10 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
)
|
||||
else:
|
||||
elif externally_setup_searcher is False:
|
||||
searcher = None
|
||||
else:
|
||||
searcher = externally_setup_searcher
|
||||
|
||||
analysis = tune.run(
|
||||
easy_objective_custom_tune,
|
||||
@ -120,5 +122,19 @@ def test_reproducibility():
|
||||
), "flaml.tune not reproducible when the searcher is set up externally"
|
||||
|
||||
|
||||
def test_gs_reproducibility():
|
||||
from flaml import BlendSearch, tune
|
||||
|
||||
def f(config):
|
||||
return {"m": 0.35}
|
||||
|
||||
search_space = {"a": tune.randint(1, 100)}
|
||||
bs = BlendSearch(space=search_space, cost_attr=None)
|
||||
analysis1 = tune.run(f, search_alg=bs, num_samples=2, metric="m", mode="max")
|
||||
bs = BlendSearch(space=search_space, cost_attr=None)
|
||||
analysis2 = tune.run(f, search_alg=bs, num_samples=2, metric="m", mode="max")
|
||||
assert analysis1.trials[-1].config == analysis2.trials[-1].config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_reproducibility()
|
||||
|
@ -66,6 +66,14 @@ def test_define_by_run():
|
||||
cfo.suggest(f"t{i}")
|
||||
# print(normalize(config, bs._gs.space, config, {}, False))
|
||||
print(complete_config({}, cfo._ls.space, cfo._ls))
|
||||
# test hierarchical space with low_cost_partial_config
|
||||
bs = BlendSearch(
|
||||
space={"c": tune.choice([0, choice]), "randn": tune.randn(10, 2)},
|
||||
low_cost_partial_config={"randn": 10},
|
||||
metric="metric",
|
||||
mode="max",
|
||||
)
|
||||
tune.run(lambda config: {"metric": 1}, search_alg=bs)
|
||||
|
||||
|
||||
def test_grid():
|
||||
|
25
test/tune/test_stop.py
Normal file
25
test/tune/test_stop.py
Normal file
@ -0,0 +1,25 @@
|
||||
from flaml import tune
|
||||
|
||||
n_trials = 0
|
||||
|
||||
|
||||
def evaluate_config(config):
|
||||
global n_trials
|
||||
n_trials += 1
|
||||
if n_trials >= 10:
|
||||
return None
|
||||
metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"]
|
||||
return metric
|
||||
|
||||
|
||||
def test_eval_stop():
|
||||
analysis = tune.run(
|
||||
evaluate_config,
|
||||
config={
|
||||
"x": tune.qloguniform(lower=1, upper=100000, q=1),
|
||||
"y": tune.qlograndint(lower=2, upper=100000, q=2),
|
||||
},
|
||||
num_samples=100,
|
||||
mode="max",
|
||||
)
|
||||
assert len(analysis.trials) == 10
|
@ -5085,9 +5085,9 @@ json-schema-traverse@^1.0.0:
|
||||
integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==
|
||||
|
||||
json5@^2.1.2, json5@^2.2.1:
|
||||
version "2.2.1"
|
||||
resolved "https://registry.npmmirror.com/json5/-/json5-2.2.1.tgz#655d50ed1e6f95ad1a3caababd2b0efda10b395c"
|
||||
integrity sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==
|
||||
version "2.2.3"
|
||||
resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.3.tgz#78cd6f1a19bdc12b73db5ad0c61efd66c1e29283"
|
||||
integrity sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==
|
||||
|
||||
jsonfile@^6.0.1:
|
||||
version "6.1.0"
|
||||
|
Loading…
x
Reference in New Issue
Block a user