autogen/test/nlp/test_autohf_tokenclassification.py
Susan Xueqing Liu 2ebddd67ae
Remove NLP classification head (#756)
* rm classification head in nlp

* rm classification head in nlp

* rm classification head in nlp

* adding test cases for switch classification head

* adding test cases for switch classification head

* Update test/nlp/test_autohf_classificationhead.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* adding test cases for switch classification head

* run each test separately

* skip classification head test on windows

* disabling wandb reporting

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* fix test nlp custom metric

* Update website/docs/Examples/AutoML-NLP.md

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update website/docs/Examples/AutoML-NLP.md

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* fix test nlp custom metric

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
2022-10-12 17:04:42 -07:00

120 lines
3.3 KiB
Python

import sys
import pytest
import requests
import os
import shutil
from utils import (
get_toy_data_tokenclassification_idlabel,
get_toy_data_tokenclassification_tokenlabel,
get_automl_settings,
)
@pytest.mark.skipif(
sys.platform == "darwin" or sys.version < "3.7",
reason="do not run on mac os or py<3.7",
)
def test_tokenclassification_idlabel():
from flaml import AutoML
X_train, y_train, X_val, y_val = get_toy_data_tokenclassification_idlabel()
automl = AutoML()
automl_settings = get_automl_settings()
automl_settings["task"] = "token-classification"
automl_settings[
"metric"
] = "seqeval:overall_f1" # evaluating based on the overall_f1 of seqeval
automl_settings["fit_kwargs_by_estimator"]["transformer"]["label_list"] = [
"O",
"B-PER",
"I-PER",
"B-ORG",
"I-ORG",
"B-LOC",
"I-LOC",
"B-MISC",
"I-MISC",
]
try:
automl.fit(
X_train=X_train,
y_train=y_train,
X_val=X_val,
y_val=y_val,
**automl_settings
)
except requests.exceptions.HTTPError:
return
# perf test
import json
with open("seqclass.log", "r") as fin:
for line in fin:
each_log = json.loads(line.strip("\n"))
if "validation_loss" in each_log:
val_loss = each_log["validation_loss"]
min_inter_result = min(
each_dict.get("eval_automl_metric", sys.maxsize)
for each_dict in each_log["logged_metric"]["intermediate_results"]
)
if min_inter_result != sys.maxsize:
assert val_loss == min_inter_result
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
@pytest.mark.skipif(
sys.platform == "darwin" or sys.version < "3.7",
reason="do not run on mac os or py<3.7",
)
def test_tokenclassification_tokenlabel():
from flaml import AutoML
X_train, y_train, X_val, y_val = get_toy_data_tokenclassification_tokenlabel()
automl = AutoML()
automl_settings = get_automl_settings()
automl_settings["task"] = "token-classification"
automl_settings[
"metric"
] = "seqeval:overall_f1" # evaluating based on the overall_f1 of seqeval
try:
automl.fit(
X_train=X_train,
y_train=y_train,
X_val=X_val,
y_val=y_val,
**automl_settings
)
except requests.exceptions.HTTPError:
return
# perf test
import json
with open("seqclass.log", "r") as fin:
for line in fin:
each_log = json.loads(line.strip("\n"))
if "validation_loss" in each_log:
val_loss = each_log["validation_loss"]
min_inter_result = min(
each_dict.get("eval_automl_metric", sys.maxsize)
for each_dict in each_log["logged_metric"]["intermediate_results"]
)
if min_inter_result != sys.maxsize:
assert val_loss == min_inter_result
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_tokenclassification_idlabel()