This PR fixes the frequent NLP bugs in the other PRs (#647)

* fix nlp bug * resetting model to electra small * removing model_path from fit_kwargs_by_estimator
2025-12-07 12:30:58 +00:00 · 2022-07-25 17:46:33 -04:00 · 2022-07-25 17:46:33 -04:00 · 731afec9eb
commit 731afec9eb
parent 89d9c4426e
3 changed files with 26 additions and 18 deletions
--- a/test/nlp/test_autohf_summarization.py
+++ b/test/nlp/test_autohf_summarization.py
@ -4,7 +4,10 @@ import requests
 from utils import get_toy_data_summarization, get_automl_settings


-@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
+@pytest.mark.skipif(
+    sys.platform == "darwin" or sys.version < "3.7",
+    reason="do not run on mac os or py < 3.7",
+)
 def test_summarization():
    # TODO: manual test for how effective postprocess_seq2seq_prediction_label is
    from flaml import AutoML
--- a/test/nlp/test_default.py
+++ b/test/nlp/test_default.py
@ -63,7 +63,6 @@ def test_starting_point_not_in_search_space():
        }
    }
    automl_settings["starting_points"] = "data:test/nlp/default/"
-    del automl_settings["fit_kwargs_by_estimator"][this_estimator_name]["model_path"]

    automl.fit(X_train, y_train, **automl_settings)
    assert len(automl._search_states[this_estimator_name].init_config) == len(
@ -90,10 +89,13 @@ def test_points_to_evaluate():
    automl = AutoML()
    automl_settings = get_automl_settings(estimator_name="transformer_ms")

-    automl_settings["estimator_list"] = ["transformer_ms"]
-    automl_settings["starting_points"] = "data"
+    automl_settings["starting_points"] = "data:test/nlp/default/"

-    del automl_settings["fit_kwargs_by_estimator"]["transformer_ms"]["model_path"]
+    automl_settings["custom_hp"] = {
+        "transformer_ms": {
+            "model_path": {"domain": "google/electra-small-discriminator"}
+        }
+    }

    automl.fit(X_train, y_train, **automl_settings)

@ -109,8 +111,6 @@ def test_zero_shot_nomodel():

    automl_settings = get_automl_settings(estimator_name)

-    del automl_settings["fit_kwargs_by_estimator"][estimator_name]["model_path"]
-
    (
        hyperparams,
        estimator_class,
@ -146,10 +146,6 @@ def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
    location = "test/nlp/default"
    X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()

-    automl_settings = get_automl_settings(estimator_name)
-
-    del automl_settings["fit_kwargs_by_estimator"][estimator_name]["model_path"]
-
    try:
        (
            hyperparams,
--- a/test/nlp/utils.py
+++ b/test/nlp/utils.py
@ -1514,14 +1514,23 @@ def get_automl_settings(estimator_name="transformer"):
        "use_ray": False,
    }

-    automl_settings["fit_kwargs_by_estimator"] = {
-        estimator_name: {
-            "model_path": "google/electra-small-discriminator",
-            "output_dir": "test/data/output/",
-            "ckpt_per_epoch": 1,
-            "fp16": False,
+    if estimator_name.endswith("ms"):
+        automl_settings["fit_kwargs_by_estimator"] = {
+            estimator_name: {
+                "output_dir": "test/data/output/",
+                "ckpt_per_epoch": 1,
+                "fp16": False,
+            }
+        }
+    else:
+        automl_settings["fit_kwargs_by_estimator"] = {
+            estimator_name: {
+                "model_path": "google/electra-small-discriminator",
+                "output_dir": "test/data/output/",
+                "ckpt_per_epoch": 1,
+                "fp16": False,
+            }
        }
-    }

    automl_settings["estimator_list"] = [estimator_name]
    return automl_settings