autogen/test/test_split.py
Kevin Chen 3d0a3d26a2
Forecast (#162)
* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>
2021-08-23 13:26:46 -07:00

79 lines
2.2 KiB
Python

import unittest
from sklearn.datasets import fetch_openml
from flaml.automl import AutoML
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
dataset = "credit-g"
def _test(split_type):
from sklearn.externals._arff import ArffException
automl = AutoML()
automl_settings = {
"time_budget": 2,
# "metric": 'accuracy',
"task": 'classification',
"log_file_name": "test/{}.log".format(dataset),
"model_history": True,
"log_training_metric": True,
"split_type": split_type,
}
try:
X, y = fetch_openml(name=dataset, return_X_y=True)
except (ArffException, ValueError):
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
if split_type != 'time':
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
random_state=42)
else:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
shuffle=False)
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
pred = automl.predict(X_test)
acc = accuracy_score(y_test, pred)
print(acc)
def _test_uniform():
_test(split_type="uniform")
def test_time():
_test(split_type="time")
def test_groups():
from sklearn.externals._arff import ArffException
try:
X, y = fetch_openml(name=dataset, return_X_y=True)
except (ArffException, ValueError):
from sklearn.datasets import load_wine
X, y = load_wine(return_X_y=True)
import numpy as np
automl = AutoML()
automl_settings = {
"time_budget": 2,
"task": 'classification',
"log_file_name": "test/{}.log".format(dataset),
"model_history": True,
"eval_method": "cv",
"groups": np.random.randint(low=0, high=10, size=len(y)),
"estimator_list": ['lgbm', 'rf', 'xgboost', 'kneighbor'], # list of ML learners
"learner_selector": "roundrobin",
}
automl.fit(X, y, **automl_settings)
if __name__ == "__main__":
unittest.main()