variable name (#187)

This commit is contained in:
Chi Wang 2021-09-04 20:28:37 -07:00 committed by GitHub
parent e46573a01d
commit 339eb80f44
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 45 additions and 2 deletions

View File

@ -366,7 +366,7 @@ class AutoML:
@property
def classes_(self):
'''A list of n_classes elements for class labels.'''
attr = getattr(self, "label_transformer", None)
attr = getattr(self, "_label_transformer", None)
if attr:
return attr.classes_.tolist()
attr = getattr(self, "_trained_estimator", None)

View File

@ -87,6 +87,7 @@ class AutoTransformers:
@staticmethod
def _get_split_name(data_raw, fold_name=None):
# TODO coverage
if fold_name:
return fold_name
fold_keys = data_raw.keys()
@ -280,6 +281,7 @@ class AutoTransformers:
model_config = _set_model_config()
if is_pretrained_model_in_classification_head_list():
# TODO coverage
if self._num_labels != num_labels_old:
this_model = get_this_model()
model_config.num_labels = self._num_labels
@ -295,6 +297,7 @@ class AutoTransformers:
this_model.resize_token_embeddings(len(self._tokenizer))
return this_model
elif this_task == "regression":
# TODO add test
model_config_num_labels = 1
model_config = _set_model_config()
this_model = get_this_model()
@ -304,6 +307,7 @@ class AutoTransformers:
data_name = JobID.dataset_list_to_str(self.jobid_config.dat)
if data_name in ("glue", "super_glue"):
metric = datasets.load.load_metric(data_name, self.jobid_config.subdat)
# TODO delete
elif data_name in ("squad", "squad_v2"):
metric = datasets.load.load_metric(data_name)
else:
@ -312,6 +316,7 @@ class AutoTransformers:
def _compute_metrics_by_dataset_name(self,
eval_pred):
# TODO coverage
predictions, labels = eval_pred
predictions = np.squeeze(predictions) \
if self.task_name == "regression" else np.argmax(predictions, axis=1)
@ -321,6 +326,7 @@ class AutoTransformers:
def _compute_checkpoint_freq(self,
num_train_epochs,
batch_size):
# TODO coverage
if "gpu" in self._resources_per_trial:
ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size
/ self._resources_per_trial["gpu"] / self.ckpt_per_epoch) + 1
@ -544,6 +550,7 @@ class AutoTransformers:
_fp16=True,
**custom_hpo_args
):
# TODO remove?
from transformers.trainer_utils import HPSearchBackend
'''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose).
@ -657,6 +664,7 @@ class AutoTransformers:
return validation_metric
def _set_transformers_verbosity(self, transformers_verbose):
# TODO coverage
if transformers_verbose == transformers.logging.ERROR:
transformers.logging.set_verbosity_error()
elif transformers_verbose == transformers.logging.WARNING:

View File

@ -77,6 +77,7 @@ def tokenize_superglue_wic(this_example,
try:
padding_direction = this_tokenizer.padding_side
if padding_direction == "left":
# TODO coverage
padding_id = input_ids_sepp[0]
while input_ids_sepp[ptr_sepp] == padding_id:
ptr_sepp += 1
@ -103,6 +104,7 @@ def tokenize_superglue_wic(this_example,
which_sepp += 1
ptr_sepp += 1
else:
# TODO coverage
ptr_sepp += 1
"""
max_word_span is the maximum tokens of the word
@ -131,6 +133,7 @@ def tokenize_glue(this_example,
if len(sentence_keys) > 1:
sentence1_key, sentence2_key = sentence_keys[0], sentence_keys[1]
else:
# TODO coverage
sentence1_key = sentence_keys[0]
sentence2_key = None

View File

@ -64,6 +64,7 @@ def get_default_and_alternative_metric(dataset_name_list: typing.List,
return default_metric, default_mode, all_metrics, all_mode
else:
# TODO coverage
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
default_metric, default_mode = eval_name_mapping[0]

View File

@ -59,6 +59,7 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
if subdataset_name != "mnli":
is_match = subdataset_name == each_subdataset_name
else:
# TODO coverage
if dev_name == "validation_matched":
is_match = each_file == "MNLI-m.tsv"
else:
@ -68,11 +69,13 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
writer.write("index\tprediction\n")
for index, item in enumerate(predictions):
if subdataset_name == "stsb":
# TODO coverage
if item > 5.0:
item = 5.0
writer.write(f"{index}\t{item:3.3f}\n")
else:
if subdataset_name in ("rte", "qnli", "mnli"):
# TODO coverage
item = label_list[item]
writer.write(f"{index}\t{item}\n")
else:
@ -80,6 +83,7 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
item = int(item)
writer.write(f"{index}\t{item}\n")
else:
# TODO coverage
writer.write(f"{index}\t{item:3.3f}\n")
shutil.make_archive(os.path.join(output_path, zip_file_name), 'zip', output_dir)

View File

@ -42,6 +42,7 @@ def get_default_task(dataset_name_list: list, subdataset_name=None):
"dataset_name and subdataset_name not correctly specified"
default_task = eval_name_mapping[subdataset_name]
else:
# TODO coverage
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
default_task = eval_name_mapping
return default_task

View File

@ -33,8 +33,10 @@ def bounded_gridunion(model_type=None,
if "u" in custom_hpo_args["bound"][each_key]:
upper = custom_hpo_args["bound"][each_key]["u"]
else:
# TODO coverage
upper = 100000
if "l" in custom_hpo_args["bound"][each_key]:
# TODO coverage
lower = custom_hpo_args["bound"][each_key]["l"]
else:
lower = -100000
@ -42,6 +44,7 @@ def bounded_gridunion(model_type=None,
upper_id = len(original_space)
for x in range(len(original_space)):
if original_space[x] > upper:
# TODO coverage
upper_id = x
break
lower_id = 0
@ -121,6 +124,7 @@ def hpo_space_generic_grid(model_type=None,
subdataset_name=None,
algo_mode=None,
**custom_hpo_args):
# TODO coverage
output_config = {
"learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4],
"num_train_epochs": [3, 10],
@ -137,6 +141,7 @@ def hpo_space_small(model_type=None,
subdataset_name=None,
algo_mode=None,
**custom_hpo_args):
# TODO coverage
config_json = AutoGridSearchSpace.from_model_and_dataset_name(
model_type, model_size_type, dataset_name_list, subdataset_name, "hpo")
output_config = {}

View File

@ -70,9 +70,11 @@ class AutoSearchAlgorithm:
assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name"
if not search_algo_name:
# TODO coverage
search_algo_name = "grid"
if search_algo_name in SEARCH_ALGO_MAPPING.keys():
if SEARCH_ALGO_MAPPING[search_algo_name] is None:
# TODO coverage
return None
"""
filtering the customized args for hpo from custom_hpo_args, keep those
@ -91,6 +93,7 @@ class AutoSearchAlgorithm:
: max(hpo_search_space["per_device_train_batch_size"].categories)},
"""
if search_algo_args_mode == "dft":
# TODO coverage
this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
"dft",
metric_name,
@ -121,6 +124,7 @@ class AutoSearchAlgorithm:
@staticmethod
def grid2list(grid_config):
# TODO coverage
key_val_list = [[(key, each_val) for each_val in val_list['grid_search']]
for (key, val_list) in grid_config.items()]
config_list = [dict(x) for x in itertools.product(*key_val_list)]
@ -132,6 +136,7 @@ def get_search_algo_args_optuna(search_args_mode,
metric_mode_name,
hpo_search_space=None,
**custom_hpo_args):
# TODO coverage
return {}
@ -145,6 +150,7 @@ def default_search_algo_args_bs(search_args_mode,
isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical):
min_epoch = min(hpo_search_space["num_train_epochs"].categories)
else:
# TODO coverage
assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float)
min_epoch = hpo_search_space["num_train_epochs"].lower
default_search_algo_args = {
@ -166,6 +172,7 @@ def default_search_algo_args_grid_search(search_args_mode,
metric_mode_name,
hpo_search_space=None,
**custom_hpo_args):
# TODO coverage
return {}
@ -174,6 +181,7 @@ def default_search_algo_args_random_search(search_args_mode,
metric_mode_name,
hpo_search_space=None,
**custom_hpo_args):
# TODO coverage
return {}

View File

@ -17,6 +17,7 @@ class TrainerForAutoTransformers(TFTrainer):
eval_dataset:
the dataset to be evaluated
"""
# TODO coverage
from ray import tune
eval_dataloader = self.get_eval_dataloader(eval_dataset)
@ -38,6 +39,7 @@ class TrainerForAutoTransformers(TFTrainer):
Overriding transformers.Trainer.save_state. It is only through saving
the states can best_trial.get_best_checkpoint return a non-empty value.
"""
# TODO coverage
import torch
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
from ray import tune
@ -80,6 +82,7 @@ class TrainerForAutoTransformers(TFTrainer):
device_count=None):
if max_steps:
return int(warmup_ratio * max_steps)
# TODO coverage
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
num_train_epochs,
num_train_examples,

View File

@ -360,6 +360,7 @@ class JobID:
print("console_args has no attribute {}, continue".format(each_key))
continue
if self.mod == "grid":
# TODO coverage
self.alg = "grid"
@ -447,6 +448,7 @@ class AzureUtils:
if autohf is not None:
self.jobid = autohf.jobid_config
else:
# TODO coverage
assert jobid_config is not None, "jobid_config must be passed either through autohf.jobid_config" \
" or jobid_config"
self.jobid = jobid_config
@ -527,6 +529,7 @@ class AzureUtils:
print("Your output will not be synced to azure because azure-blob-storage is not installed")
def download_azure_blob(self, blobname):
# TODO coverage
blob_client = self._init_blob_client(blobname)
if blob_client:
pathlib.Path(re.search("(?P<parent_path>^.*)/[^/]+$", blobname).group("parent_path")).mkdir(
@ -549,6 +552,7 @@ class AzureUtils:
metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric]
time_stamp = each_trial.metric_analysis['timestamp']
except KeyError:
# TODO coverage
print("KeyError, {} does not contain the key {} or {}".format("each_trial.metric_analysis",
"eval_" + analysis.default_metric,
"timestamp"))
@ -608,6 +612,7 @@ class AzureUtils:
"""
azure_save_file_name = local_json_file.split("/")[-1][:-5]
if self.data_root_dir is None:
# TODO coverage
from ..utils import load_dft_args
console_args = load_dft_args()
output_dir = getattr(console_args, "data_root_dir")
@ -622,6 +627,7 @@ class AzureUtils:
@staticmethod
def is_after_earliest_time(this_blob, earliest_time: Tuple[int, int, int]):
# TODO coverage
import pytz
utc = pytz.UTC
if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])):
@ -639,6 +645,7 @@ class AzureUtils:
container_client = self._init_azure_clients()
if container_client:
for each_blob in container_client.list_blobs():
# TODO coverage
if each_blob.name.startswith(root_log_path):
each_jobconfig = JobID.convert_blobname_to_jobid(each_blob.name)
is_append = False
@ -701,6 +708,7 @@ class AzureUtils:
"""
matched_config_score_lists = []
for (each_jobconfig, each_blob) in matched_blob_list:
# TODO coverage
self.download_azure_blob(each_blob.name)
data_json = json.load(open(each_blob.name, "r"))
each_config_and_score_list = ConfigScoreList(

View File

@ -35,6 +35,7 @@ class WandbUtils:
os.environ["WANDB_API_KEY"] = wandb_key
os.environ["WANDB_MODE"] = "online"
else:
# TODO coverage
os.environ["WANDB_MODE"] = "disabled"
self.jobid_config = jobid_config
@ -53,6 +54,7 @@ class WandbUtils:
return ""
def set_wandb_per_trial(self):
# TODO coverage
print("before wandb.init\n\n\n")
try:
import wandb

View File

@ -1 +1 @@
__version__ = "0.6.1"
__version__ = "0.6.2"