variable name (#187)

2025-12-27 06:59:03 +00:00 · 2021-09-04 20:28:37 -07:00 · 2021-09-04 20:28:37 -07:00 · 339eb80f44
commit 339eb80f44
parent e46573a01d
12 changed files with 45 additions and 2 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -366,7 +366,7 @@ class AutoML:
    @property
    def classes_(self):
        '''A list of n_classes elements for class labels.'''
-        attr = getattr(self, "label_transformer", None)
+        attr = getattr(self, "_label_transformer", None)
        if attr:
            return attr.classes_.tolist()
        attr = getattr(self, "_trained_estimator", None)
--- a/flaml/nlp/autotransformers.py
+++ b/flaml/nlp/autotransformers.py
@ -87,6 +87,7 @@ class AutoTransformers:

    @staticmethod
    def _get_split_name(data_raw, fold_name=None):
+        # TODO coverage
        if fold_name:
            return fold_name
        fold_keys = data_raw.keys()
@ -280,6 +281,7 @@ class AutoTransformers:
            model_config = _set_model_config()

            if is_pretrained_model_in_classification_head_list():
+                # TODO coverage
                if self._num_labels != num_labels_old:
                    this_model = get_this_model()
                    model_config.num_labels = self._num_labels
@ -295,6 +297,7 @@ class AutoTransformers:
            this_model.resize_token_embeddings(len(self._tokenizer))
            return this_model
        elif this_task == "regression":
+            # TODO add test
            model_config_num_labels = 1
            model_config = _set_model_config()
            this_model = get_this_model()
@ -304,6 +307,7 @@ class AutoTransformers:
        data_name = JobID.dataset_list_to_str(self.jobid_config.dat)
        if data_name in ("glue", "super_glue"):
            metric = datasets.load.load_metric(data_name, self.jobid_config.subdat)
+        # TODO delete
        elif data_name in ("squad", "squad_v2"):
            metric = datasets.load.load_metric(data_name)
        else:
@ -312,6 +316,7 @@ class AutoTransformers:

    def _compute_metrics_by_dataset_name(self,
                                         eval_pred):
+        # TODO coverage
        predictions, labels = eval_pred
        predictions = np.squeeze(predictions) \
            if self.task_name == "regression" else np.argmax(predictions, axis=1)
@ -321,6 +326,7 @@ class AutoTransformers:
    def _compute_checkpoint_freq(self,
                                 num_train_epochs,
                                 batch_size):
+        # TODO coverage
        if "gpu" in self._resources_per_trial:
            ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size
                                 / self._resources_per_trial["gpu"] / self.ckpt_per_epoch) + 1
@ -544,6 +550,7 @@ class AutoTransformers:
               _fp16=True,
               **custom_hpo_args
               ):
+        # TODO remove?
        from transformers.trainer_utils import HPSearchBackend

        '''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose).
@ -657,6 +664,7 @@ class AutoTransformers:
        return validation_metric

    def _set_transformers_verbosity(self, transformers_verbose):
+        # TODO coverage
        if transformers_verbose == transformers.logging.ERROR:
            transformers.logging.set_verbosity_error()
        elif transformers_verbose == transformers.logging.WARNING:
--- a/flaml/nlp/dataset/dataprocess_auto.py
+++ b/flaml/nlp/dataset/dataprocess_auto.py
@ -77,6 +77,7 @@ def tokenize_superglue_wic(this_example,
    try:
        padding_direction = this_tokenizer.padding_side
        if padding_direction == "left":
+            # TODO coverage
            padding_id = input_ids_sepp[0]
            while input_ids_sepp[ptr_sepp] == padding_id:
                ptr_sepp += 1
@ -103,6 +104,7 @@ def tokenize_superglue_wic(this_example,
                which_sepp += 1
                ptr_sepp += 1
            else:
+                # TODO coverage
                ptr_sepp += 1
    """
        max_word_span is the maximum tokens of the word
@ -131,6 +133,7 @@ def tokenize_glue(this_example,
    if len(sentence_keys) > 1:
        sentence1_key, sentence2_key = sentence_keys[0], sentence_keys[1]
    else:
+        # TODO coverage
        sentence1_key = sentence_keys[0]
        sentence2_key = None

--- a/flaml/nlp/dataset/metric_auto.py
+++ b/flaml/nlp/dataset/metric_auto.py
@ -64,6 +64,7 @@ def get_default_and_alternative_metric(dataset_name_list: typing.List,

        return default_metric, default_mode, all_metrics, all_mode
    else:
+        # TODO coverage
        assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"

        default_metric, default_mode = eval_name_mapping[0]
--- a/flaml/nlp/dataset/submission_auto.py
+++ b/flaml/nlp/dataset/submission_auto.py
@ -59,6 +59,7 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
            if subdataset_name != "mnli":
                is_match = subdataset_name == each_subdataset_name
            else:
+                # TODO coverage
                if dev_name == "validation_matched":
                    is_match = each_file == "MNLI-m.tsv"
                else:
@ -68,11 +69,13 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
                    writer.write("index\tprediction\n")
                    for index, item in enumerate(predictions):
                        if subdataset_name == "stsb":
+                            # TODO coverage
                            if item > 5.0:
                                item = 5.0
                            writer.write(f"{index}\t{item:3.3f}\n")
                        else:
                            if subdataset_name in ("rte", "qnli", "mnli"):
+                                # TODO coverage
                                item = label_list[item]
                                writer.write(f"{index}\t{item}\n")
                            else:
@ -80,6 +83,7 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
                                    item = int(item)
                                    writer.write(f"{index}\t{item}\n")
                                else:
+                                    # TODO coverage
                                    writer.write(f"{index}\t{item:3.3f}\n")

    shutil.make_archive(os.path.join(output_path, zip_file_name), 'zip', output_dir)
--- a/flaml/nlp/dataset/task_auto.py
+++ b/flaml/nlp/dataset/task_auto.py
@ -42,6 +42,7 @@ def get_default_task(dataset_name_list: list, subdataset_name=None):
            "dataset_name and subdataset_name not correctly specified"
        default_task = eval_name_mapping[subdataset_name]
    else:
+        # TODO coverage
        assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
        default_task = eval_name_mapping
    return default_task
--- a/flaml/nlp/hpo/hpo_searchspace.py
+++ b/flaml/nlp/hpo/hpo_searchspace.py
@ -33,8 +33,10 @@ def bounded_gridunion(model_type=None,
        if "u" in custom_hpo_args["bound"][each_key]:
            upper = custom_hpo_args["bound"][each_key]["u"]
        else:
+            # TODO coverage
            upper = 100000
        if "l" in custom_hpo_args["bound"][each_key]:
+            # TODO coverage
            lower = custom_hpo_args["bound"][each_key]["l"]
        else:
            lower = -100000
@ -42,6 +44,7 @@ def bounded_gridunion(model_type=None,
        upper_id = len(original_space)
        for x in range(len(original_space)):
            if original_space[x] > upper:
+                # TODO coverage
                upper_id = x
                break
        lower_id = 0
@ -121,6 +124,7 @@ def hpo_space_generic_grid(model_type=None,
                           subdataset_name=None,
                           algo_mode=None,
                           **custom_hpo_args):
+    # TODO coverage
    output_config = {
        "learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4],
        "num_train_epochs": [3, 10],
@ -137,6 +141,7 @@ def hpo_space_small(model_type=None,
                    subdataset_name=None,
                    algo_mode=None,
                    **custom_hpo_args):
+    # TODO coverage
    config_json = AutoGridSearchSpace.from_model_and_dataset_name(
        model_type, model_size_type, dataset_name_list, subdataset_name, "hpo")
    output_config = {}
--- a/flaml/nlp/hpo/searchalgo_auto.py
+++ b/flaml/nlp/hpo/searchalgo_auto.py
@ -70,9 +70,11 @@ class AutoSearchAlgorithm:

        assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name"
        if not search_algo_name:
+            # TODO coverage
            search_algo_name = "grid"
        if search_algo_name in SEARCH_ALGO_MAPPING.keys():
            if SEARCH_ALGO_MAPPING[search_algo_name] is None:
+                # TODO coverage
                return None
            """
            filtering the customized args for hpo from custom_hpo_args, keep those
@ -91,6 +93,7 @@ class AutoSearchAlgorithm:
             : max(hpo_search_space["per_device_train_batch_size"].categories)},
            """
            if search_algo_args_mode == "dft":
+                # TODO coverage
                this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
                    "dft",
                    metric_name,
@ -121,6 +124,7 @@ class AutoSearchAlgorithm:

    @staticmethod
    def grid2list(grid_config):
+        # TODO coverage
        key_val_list = [[(key, each_val) for each_val in val_list['grid_search']]
                        for (key, val_list) in grid_config.items()]
        config_list = [dict(x) for x in itertools.product(*key_val_list)]
@ -132,6 +136,7 @@ def get_search_algo_args_optuna(search_args_mode,
                                metric_mode_name,
                                hpo_search_space=None,
                                **custom_hpo_args):
+    # TODO coverage
    return {}


@ -145,6 +150,7 @@ def default_search_algo_args_bs(search_args_mode,
            isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical):
        min_epoch = min(hpo_search_space["num_train_epochs"].categories)
    else:
+        # TODO coverage
        assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float)
        min_epoch = hpo_search_space["num_train_epochs"].lower
    default_search_algo_args = {
@ -166,6 +172,7 @@ def default_search_algo_args_grid_search(search_args_mode,
                                         metric_mode_name,
                                         hpo_search_space=None,
                                         **custom_hpo_args):
+    # TODO coverage
    return {}


@ -174,6 +181,7 @@ def default_search_algo_args_random_search(search_args_mode,
                                           metric_mode_name,
                                           hpo_search_space=None,
                                           **custom_hpo_args):
+    # TODO coverage
    return {}


--- a/flaml/nlp/huggingface/trainer.py
+++ b/flaml/nlp/huggingface/trainer.py
@ -17,6 +17,7 @@ class TrainerForAutoTransformers(TFTrainer):
                eval_dataset:
                    the dataset to be evaluated
        """
+        # TODO coverage
        from ray import tune

        eval_dataloader = self.get_eval_dataloader(eval_dataset)
@ -38,6 +39,7 @@ class TrainerForAutoTransformers(TFTrainer):
                Overriding transformers.Trainer.save_state. It is only through saving
                the states can best_trial.get_best_checkpoint return a non-empty value.
        """
+        # TODO coverage
        import torch
        from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
        from ray import tune
@ -80,6 +82,7 @@ class TrainerForAutoTransformers(TFTrainer):
            device_count=None):
        if max_steps:
            return int(warmup_ratio * max_steps)
+        # TODO coverage
        max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
            num_train_epochs,
            num_train_examples,
--- a/flaml/nlp/result_analysis/azure_utils.py
+++ b/flaml/nlp/result_analysis/azure_utils.py
@ -360,6 +360,7 @@ class JobID:
                print("console_args has no attribute {}, continue".format(each_key))
                continue
        if self.mod == "grid":
+            # TODO coverage
            self.alg = "grid"


@ -447,6 +448,7 @@ class AzureUtils:
        if autohf is not None:
            self.jobid = autohf.jobid_config
        else:
+            # TODO coverage
            assert jobid_config is not None, "jobid_config must be passed either through autohf.jobid_config" \
                                             " or jobid_config"
            self.jobid = jobid_config
@ -527,6 +529,7 @@ class AzureUtils:
            print("Your output will not be synced to azure because azure-blob-storage is not installed")

    def download_azure_blob(self, blobname):
+        # TODO coverage
        blob_client = self._init_blob_client(blobname)
        if blob_client:
            pathlib.Path(re.search("(?P<parent_path>^.*)/[^/]+$", blobname).group("parent_path")).mkdir(
@ -549,6 +552,7 @@ class AzureUtils:
                metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric]
                time_stamp = each_trial.metric_analysis['timestamp']
            except KeyError:
+                # TODO coverage
                print("KeyError, {} does not contain the key {} or {}".format("each_trial.metric_analysis",
                                                                              "eval_" + analysis.default_metric,
                                                                              "timestamp"))
@ -608,6 +612,7 @@ class AzureUtils:
        """
        azure_save_file_name = local_json_file.split("/")[-1][:-5]
        if self.data_root_dir is None:
+            # TODO coverage
            from ..utils import load_dft_args
            console_args = load_dft_args()
            output_dir = getattr(console_args, "data_root_dir")
@ -622,6 +627,7 @@ class AzureUtils:

    @staticmethod
    def is_after_earliest_time(this_blob, earliest_time: Tuple[int, int, int]):
+        # TODO coverage
        import pytz
        utc = pytz.UTC
        if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])):
@ -639,6 +645,7 @@ class AzureUtils:
        container_client = self._init_azure_clients()
        if container_client:
            for each_blob in container_client.list_blobs():
+                # TODO coverage
                if each_blob.name.startswith(root_log_path):
                    each_jobconfig = JobID.convert_blobname_to_jobid(each_blob.name)
                    is_append = False
@ -701,6 +708,7 @@ class AzureUtils:
        """
        matched_config_score_lists = []
        for (each_jobconfig, each_blob) in matched_blob_list:
+            # TODO coverage
            self.download_azure_blob(each_blob.name)
            data_json = json.load(open(each_blob.name, "r"))
            each_config_and_score_list = ConfigScoreList(
--- a/flaml/nlp/result_analysis/wandb_utils.py
+++ b/flaml/nlp/result_analysis/wandb_utils.py
@ -35,6 +35,7 @@ class WandbUtils:
            os.environ["WANDB_API_KEY"] = wandb_key
            os.environ["WANDB_MODE"] = "online"
        else:
+            # TODO coverage
            os.environ["WANDB_MODE"] = "disabled"
        self.jobid_config = jobid_config

@ -53,6 +54,7 @@ class WandbUtils:
            return ""

    def set_wandb_per_trial(self):
+        # TODO coverage
        print("before wandb.init\n\n\n")
        try:
            import wandb
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.6.1"
+__version__ = "0.6.2"