autogen/test/spark/test_overtime.py
Jirka Borovec a701cd82f8
set black with 120 line length (#975)
* set black with 120 line length

* apply pre-commit

* apply black
2023-04-10 19:50:40 +00:00

67 lines
2.0 KiB
Python

import os
import time
import numpy as np
import pytest
from sklearn.datasets import load_iris
from flaml import AutoML
try:
from test.spark.custom_mylearner import *
except ImportError:
from custom_mylearner import *
try:
import pyspark
from flaml.tune.spark.utils import check_spark
from flaml.tune.spark.mylearner import lazy_metric
os.environ["FLAML_MAX_CONCURRENT"] = "10"
spark = pyspark.sql.SparkSession.builder.appName("App4OvertimeTest").getOrCreate()
spark_available, _ = check_spark()
skip_spark = not spark_available
except ImportError:
skip_spark = True
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
def test_overtime():
time_budget = 15
df, y = load_iris(return_X_y=True, as_frame=True)
df["label"] = y
automl_experiment = AutoML()
automl_settings = {
"dataframe": df,
"label": "label",
"time_budget": time_budget,
"eval_method": "cv",
"metric": lazy_metric,
"task": "classification",
"log_file_name": "test/iris_custom.log",
"log_training_metric": True,
"log_type": "all",
"n_jobs": 1,
"model_history": True,
"sample_weight": np.ones(len(y)),
"pred_time_limit": 1e-5,
"estimator_list": ["lgbm"],
"n_concurrent_trials": 2,
"use_spark": True,
"force_cancel": True,
}
start_time = time.time()
automl_experiment.fit(**automl_settings)
elapsed_time = time.time() - start_time
print("time budget: {:.2f}s, actual elapsed time: {:.2f}s".format(time_budget, elapsed_time))
# assert abs(elapsed_time - time_budget) < 5 # cancel assertion because github VM sometimes is super slow, causing the test to fail
print(automl_experiment.predict(df))
print(automl_experiment.model)
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
if __name__ == "__main__":
test_overtime()