diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 116a70261..f1098eb4d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,28 @@ +default_language_version: + python: python3 + +ci: + autofix_prs: true + autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions' + autoupdate_schedule: 'quarterly' repos: - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.1.0 hooks: - id: black - language_version: python3 - - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 - hooks: - - id: flake8 + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-added-large-files - id: check-ast + - id: check-yaml + - id: check-toml + - id: check-json - id: check-byte-order-marker - id: check-merge-conflict - id: detect-private-key diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 42576a8a2..a6497e9c7 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -1104,7 +1104,6 @@ class AutoML(BaseEstimator): groups_val=None, groups=None, ): - if X_train_all is not None and y_train_all is not None: assert ( isinstance(X_train_all, np.ndarray) @@ -1266,7 +1265,6 @@ class AutoML(BaseEstimator): self._state.groups = groups def _prepare_data(self, eval_method, split_ratio, n_splits): - X_val, y_val = self._state.X_val, self._state.y_val if issparse(X_val): X_val = X_val.tocsr() diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py index dd17cffea..4d85679b4 100644 --- a/flaml/automl/ml.py +++ b/flaml/automl/ml.py @@ -439,7 +439,6 @@ def get_val_loss( fit_kwargs={}, free_mem_ratio=0, ): - start = time.time() # if groups_val is not None: # fit_kwargs['groups_val'] = groups_val diff --git a/flaml/automl/model.py b/flaml/automl/model.py index bccad6470..0d8b32c7d 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -175,7 +175,6 @@ class BaseEstimator: return X def _fit(self, X_train, y_train, **kwargs): - current_time = time.time() if "groups" in kwargs: kwargs = kwargs.copy() @@ -447,7 +446,7 @@ class TransformersEstimator(BaseEstimator): def _set_training_args(self, **kwargs): from .nlp.utils import date_str, Counter - for (key, val) in kwargs.items(): + for key, val in kwargs.items(): assert key not in self.params, ( "Since {} is in the search space, it cannot exist in 'custom_fit_kwargs' at the same time." "If you need to fix the value of {} to {}, the only way is to add a single-value domain in the search " diff --git a/flaml/automl/nlp/huggingface/training_args.py b/flaml/automl/nlp/huggingface/training_args.py index b064f51f3..7461b1caa 100644 --- a/flaml/automl/nlp/huggingface/training_args.py +++ b/flaml/automl/nlp/huggingface/training_args.py @@ -112,7 +112,6 @@ class TrainingArgumentsForAuto(TrainingArguments): @dataclass class Seq2SeqTrainingArgumentsForAuto(TrainingArgumentsForAuto): - model_path: str = field( default="t5-small", metadata={ diff --git a/flaml/automl/nlp/huggingface/utils.py b/flaml/automl/nlp/huggingface/utils.py index ba7558f09..afea65c10 100644 --- a/flaml/automl/nlp/huggingface/utils.py +++ b/flaml/automl/nlp/huggingface/utils.py @@ -135,7 +135,6 @@ def tokenize_and_align_labels( def tokenize_text_tokclassification(X, Y, tokenizer, hf_args=None): - # If the label_all_tokens flag is True, prepare two dicts label_to_id and b_to_i_label to convert the B- labels to I- labels label_to_id = {i: i for i in range(len(hf_args.label_list))} b_to_i_label = [] @@ -275,7 +274,6 @@ def tokenize_row( def tokenize_text_multiplechoice(X, tokenizer, hf_args=None): - t = X[["sent1", "sent2", "ending0", "ending1", "ending2", "ending3"]] _, tokenized_column_names = tokenize_swag( t.iloc[0], diff --git a/flaml/automl/nlp/utils.py b/flaml/automl/nlp/utils.py index 872c1d037..431bf4219 100644 --- a/flaml/automl/nlp/utils.py +++ b/flaml/automl/nlp/utils.py @@ -11,7 +11,6 @@ from flaml.automl.data import ( def load_default_huggingface_metric_for_task(task): - if task == SEQCLASSIFICATION: return "accuracy" elif task == SEQREGRESSION: diff --git a/flaml/tune/sample.py b/flaml/tune/sample.py index 66223ca1c..b59ced92d 100644 --- a/flaml/tune/sample.py +++ b/flaml/tune/sample.py @@ -647,5 +647,4 @@ def qrandn(mean: float, sd: float, q: float): def polynomial_expansion_set( init_monomials: set, highest_poly_order: int = None, allow_self_inter: bool = False ): - return PolynomialExpansionSet(init_monomials, highest_poly_order, allow_self_inter) diff --git a/flaml/tune/searcher/variant_generator.py b/flaml/tune/searcher/variant_generator.py index fb0c8ed0e..9d7decc0a 100644 --- a/flaml/tune/searcher/variant_generator.py +++ b/flaml/tune/searcher/variant_generator.py @@ -282,9 +282,9 @@ def _split_resolved_unresolved_values( _resolved_children, _unresolved_children, ) = _split_resolved_unresolved_values(v) - for (path, value) in _resolved_children.items(): + for path, value in _resolved_children.items(): resolved_vars[(k,) + path] = value - for (path, value) in _unresolved_children.items(): + for path, value in _unresolved_children.items(): unresolved_vars[(k,) + path] = value elif isinstance(v, list): # Recurse into a list @@ -293,9 +293,9 @@ def _split_resolved_unresolved_values( _resolved_children, _unresolved_children, ) = _split_resolved_unresolved_values({i: elem}) - for (path, value) in _resolved_children.items(): + for path, value in _resolved_children.items(): resolved_vars[(k,) + path] = value - for (path, value) in _unresolved_children.items(): + for path, value in _unresolved_children.items(): unresolved_vars[(k,) + path] = value else: resolved_vars[(k,)] = v diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index a6046cd83..76921f199 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -142,7 +142,6 @@ class ExperimentAnalysis(EA): def report(_metric=None, **kwargs): - """A function called by the HPO application to report final or intermediate results. diff --git a/test/automl/test_multiclass.py b/test/automl/test_multiclass.py index b9f7b177a..3b6f70732 100644 --- a/test/automl/test_multiclass.py +++ b/test/automl/test_multiclass.py @@ -11,7 +11,6 @@ from flaml.automl.training_log import training_log_reader class MyRegularizedGreedyForest(SKLearnEstimator): def __init__(self, task="binary", **config): - super().__init__(task, **config) if task in CLASSIFICATION: diff --git a/test/automl/test_python_log.py b/test/automl/test_python_log.py index 720e48665..d1e38fa78 100644 --- a/test/automl/test_python_log.py +++ b/test/automl/test_python_log.py @@ -10,11 +10,9 @@ import io class TestLogging(unittest.TestCase): def test_logging_level(self): - from flaml import logger, logger_formatter with tempfile.TemporaryDirectory() as d: - training_log = os.path.join(d, "training.log") # Configure logging for the FLAML logger diff --git a/test/automl/test_training_log.py b/test/automl/test_training_log.py index d8949e6d1..8d15bacf0 100644 --- a/test/automl/test_training_log.py +++ b/test/automl/test_training_log.py @@ -12,7 +12,6 @@ class TestTrainingLog(unittest.TestCase): def test_training_log( self, path="test_training_log.log", estimator_list="auto", use_ray=False ): - with TemporaryDirectory() as d: filename = os.path.join(d, path) diff --git a/test/nlp/utils.py b/test/nlp/utils.py index f8536b960..6d133a08c 100644 --- a/test/nlp/utils.py +++ b/test/nlp/utils.py @@ -1574,7 +1574,6 @@ def get_toy_data_tokenclassification_tokenlabel(): def get_automl_settings(estimator_name="transformer"): - automl_settings = { "gpu_per_trial": 0, "max_iter": 3, diff --git a/test/test_autovw.py b/test/test_autovw.py index f75e527af..59f7b9943 100644 --- a/test/test_autovw.py +++ b/test/test_autovw.py @@ -101,7 +101,7 @@ def get_oml_to_vw(did, max_ns_num, ds_dir=VW_DS_DIR): target_attribute = ds.default_target_attribute # if target_attribute is None and did in OML_target_attribute_dict: # target_attribute = OML_target_attribute_dict[did] - except (SSLError) as e: + except SSLError as e: print(e) return diff --git a/test/tune/test_record_incumbent.py b/test/tune/test_record_incumbent.py index 9cec3c1da..fdf5bb5e7 100644 --- a/test/tune/test_record_incumbent.py +++ b/test/tune/test_record_incumbent.py @@ -17,7 +17,6 @@ def rosenbrock_function(config: dict): def test_record_incumbent(method="BlendSearch"): - if method != "CFOCat": search_space = { "x1": tune.randint(1, 9), diff --git a/test/tune/test_reproducibility.py b/test/tune/test_reproducibility.py index 0c68a9136..8b3c867a5 100644 --- a/test/tune/test_reproducibility.py +++ b/test/tune/test_reproducibility.py @@ -35,7 +35,6 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False) "height": tune.uniform(-100, 100), } if externally_setup_searcher is True: - searcher = BlendSearch( space=search_space, time_budget_s=5,