autogen/test/nlp/test_autohf_custom_metric.py

import sys
import pytest


def custom_metric(
    X_test,
    y_test,
    estimator,
    labels,
    X_train,
    y_train,
    weight_test=None,
    weight_train=None,
    config=None,
    groups_test=None,
    groups_train=None,
):
    from datasets import Dataset
    from flaml.model import TransformersEstimator
    from flaml.nlp.utils import load_default_huggingface_metric_for_task

    if estimator._trainer is None:
        estimator._init_model_for_predict(X_test)
        trainer = estimator._trainer
        estimator._trainer = None
    else:
        trainer = estimator._trainer
    if y_test is not None:
        X_test, _ = estimator._preprocess(X_test)
        eval_dataset = Dataset.from_pandas(TransformersEstimator._join(X_test, y_test))
    else:
        X_test, _ = estimator._preprocess(X_test)
        eval_dataset = Dataset.from_pandas(X_test)

    estimator_metric_cache = estimator._metric
    estimator._metric = load_default_huggingface_metric_for_task(estimator._task)

    metrics = trainer.evaluate(eval_dataset)
    estimator._metric = estimator_metric_cache

    return metrics["eval_val_loss"], metrics


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_custom_metric():
    from flaml import AutoML
    import requests
    from datasets import load_dataset

    try:
        train_dataset = (
            load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
        )
        dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
    except requests.exceptions.ConnectionError:
        return

    custom_sent_keys = ["sentence1", "sentence2"]
    label_key = "label"

    X_train = train_dataset[custom_sent_keys]
    y_train = train_dataset[label_key]

    X_val = dev_dataset[custom_sent_keys]
    y_val = dev_dataset[label_key]

    automl = AutoML()

    # testing when max_iter=1 and do retrain only without hpo

    automl_settings = {
        "gpu_per_trial": 0,
        "max_iter": 1,
        "time_budget": 5,
        "task": "seq-classification",
        "metric": custom_metric,
        "log_file_name": "seqclass.log",
    }

    automl_settings["custom_hpo_args"] = {
        "model_path": "google/electra-small-discriminator",
        "output_dir": "data/output/",
        "ckpt_per_epoch": 5,
        "fp16": False,
    }

    automl.fit(
        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
    )

    # testing calling custom metric in TransformersEstimator._compute_metrics_by_dataset_name

    automl_settings["max_iter"] = 3
    automl.fit(
        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
    )

    del automl


if __name__ == "__main__":
    test_custom_metric()
adding TODOs for NLP module, so students can implement other tasks easier (#321) * fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module 2021-12-03 12:45:16 -05:00			`import sys`
bug fix for TransformerEstimator (#293) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 * adding predict_proba, address PR 293's comments close #293 #291 2021-11-23 14:26:39 -05:00			`import pytest`


fixing custom metric (#357) * fixing the error for custom metric 2021-12-24 16:23:09 -05:00			`def custom_metric(`
Fixing the bug in custom metric (#356) * fixing the bug for custom metric 2021-12-23 18:44:53 -05:00			`X_test,`
			`y_test,`
			`estimator,`
			`labels,`
			`X_train,`
			`y_train,`
			`weight_test=None,`
			`weight_train=None,`
			`config=None,`
			`groups_test=None,`
			`groups_train=None,`
			`):`
fixing custom metric (#357) * fixing the error for custom metric 2021-12-24 16:23:09 -05:00			`from datasets import Dataset`
			`from flaml.model import TransformersEstimator`
Logging multiple checkpoints (#394) 2022-01-12 22:50:39 -05:00			`from flaml.nlp.utils import load_default_huggingface_metric_for_task`
fixing custom metric (#357) * fixing the error for custom metric 2021-12-24 16:23:09 -05:00
serialize TransformerEstimator (#381) * serialize TransformerEstimator * check has_attr * custom metric needs trainer * skip test on mac 2022-01-06 10:28:19 -08:00			`if estimator._trainer is None:`
			`estimator._init_model_for_predict(X_test)`
			`trainer = estimator._trainer`
			`estimator._trainer = None`
			`else:`
			`trainer = estimator._trainer`
fixing custom metric (#357) * fixing the error for custom metric 2021-12-24 16:23:09 -05:00			`if y_test is not None:`
			`X_test, _ = estimator._preprocess(X_test)`
			`eval_dataset = Dataset.from_pandas(TransformersEstimator._join(X_test, y_test))`
			`else:`
			`X_test, _ = estimator._preprocess(X_test)`
			`eval_dataset = Dataset.from_pandas(X_test)`

Logging multiple checkpoints (#394) 2022-01-12 22:50:39 -05:00			`estimator_metric_cache = estimator._metric`
			`estimator._metric = load_default_huggingface_metric_for_task(estimator._task)`
fixing custom metric (#357) * fixing the error for custom metric 2021-12-24 16:23:09 -05:00
			`metrics = trainer.evaluate(eval_dataset)`
Logging multiple checkpoints (#394) 2022-01-12 22:50:39 -05:00			`estimator._metric = estimator_metric_cache`

			`return metrics["eval_val_loss"], metrics`
Fixing the bug in custom metric (#356) * fixing the bug for custom metric 2021-12-23 18:44:53 -05:00

adding TODOs for NLP module, so students can implement other tasks easier (#321) * fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module 2021-12-03 12:45:16 -05:00			`@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")`
Fixing the bug in custom metric (#356) * fixing the bug for custom metric 2021-12-23 18:44:53 -05:00			`def test_custom_metric():`
bug fix for TransformerEstimator (#293) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 * adding predict_proba, address PR 293's comments close #293 #291 2021-11-23 14:26:39 -05:00			`from flaml import AutoML`
adding TODOs for NLP module, so students can implement other tasks easier (#321) * fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module 2021-12-03 12:45:16 -05:00			`import requests`
bug fix for TransformerEstimator (#293) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 * adding predict_proba, address PR 293's comments close #293 #291 2021-11-23 14:26:39 -05:00			`from datasets import load_dataset`

adding TODOs for NLP module, so students can implement other tasks easier (#321) * fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module 2021-12-03 12:45:16 -05:00			`try:`
			`train_dataset = (`
			`load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]`
			`)`
			`dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]`
			`except requests.exceptions.ConnectionError:`
			`return`
bug fix for TransformerEstimator (#293) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 * adding predict_proba, address PR 293's comments close #293 #291 2021-11-23 14:26:39 -05:00
			`custom_sent_keys = ["sentence1", "sentence2"]`
			`label_key = "label"`

			`X_train = train_dataset[custom_sent_keys]`
			`y_train = train_dataset[label_key]`

			`X_val = dev_dataset[custom_sent_keys]`
			`y_val = dev_dataset[label_key]`

			`automl = AutoML()`

Fixing the bug in custom metric (#356) * fixing the bug for custom metric 2021-12-23 18:44:53 -05:00			`# testing when max_iter=1 and do retrain only without hpo`
bug fix for TransformerEstimator (#293) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 * adding predict_proba, address PR 293's comments close #293 #291 2021-11-23 14:26:39 -05:00
			`automl_settings = {`
			`"gpu_per_trial": 0,`
			`"max_iter": 1,`
			`"time_budget": 5,`
			`"task": "seq-classification",`
fixing custom metric (#357) * fixing the error for custom metric 2021-12-24 16:23:09 -05:00			`"metric": custom_metric,`
bug fix for TransformerEstimator (#293) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 * adding predict_proba, address PR 293's comments close #293 #291 2021-11-23 14:26:39 -05:00			`"log_file_name": "seqclass.log",`
			`}`

			`automl_settings["custom_hpo_args"] = {`
			`"model_path": "google/electra-small-discriminator",`
			`"output_dir": "data/output/",`
			`"ckpt_per_epoch": 5,`
			`"fp16": False,`
			`}`

			`automl.fit(`
			`X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings`
			`)`
Fixing the bug in custom metric (#356) * fixing the bug for custom metric 2021-12-23 18:44:53 -05:00
			`# testing calling custom metric in TransformersEstimator._compute_metrics_by_dataset_name`

			`automl_settings["max_iter"] = 3`
			`automl.fit(`
			`X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings`
			`)`

bug fix for TransformerEstimator (#293) * fix checkpoint naming + trial id for non-ray mode, fix the bug in running test mode, delete all the checkpoints in non-ray mode * finished testing for checkpoint naming, delete checkpoint, ray, max iter = 1 * adding predict_proba, address PR 293's comments close #293 #291 2021-11-23 14:26:39 -05:00			`del automl`
fixing custom metric (#357) * fixing the error for custom metric 2021-12-24 16:23:09 -05:00

			`if __name__ == "__main__":`
			`test_custom_metric()`