mirror of
https://github.com/microsoft/autogen.git
synced 2025-12-27 06:59:03 +00:00
variable name (#187)
This commit is contained in:
parent
e46573a01d
commit
339eb80f44
@ -366,7 +366,7 @@ class AutoML:
|
||||
@property
|
||||
def classes_(self):
|
||||
'''A list of n_classes elements for class labels.'''
|
||||
attr = getattr(self, "label_transformer", None)
|
||||
attr = getattr(self, "_label_transformer", None)
|
||||
if attr:
|
||||
return attr.classes_.tolist()
|
||||
attr = getattr(self, "_trained_estimator", None)
|
||||
|
||||
@ -87,6 +87,7 @@ class AutoTransformers:
|
||||
|
||||
@staticmethod
|
||||
def _get_split_name(data_raw, fold_name=None):
|
||||
# TODO coverage
|
||||
if fold_name:
|
||||
return fold_name
|
||||
fold_keys = data_raw.keys()
|
||||
@ -280,6 +281,7 @@ class AutoTransformers:
|
||||
model_config = _set_model_config()
|
||||
|
||||
if is_pretrained_model_in_classification_head_list():
|
||||
# TODO coverage
|
||||
if self._num_labels != num_labels_old:
|
||||
this_model = get_this_model()
|
||||
model_config.num_labels = self._num_labels
|
||||
@ -295,6 +297,7 @@ class AutoTransformers:
|
||||
this_model.resize_token_embeddings(len(self._tokenizer))
|
||||
return this_model
|
||||
elif this_task == "regression":
|
||||
# TODO add test
|
||||
model_config_num_labels = 1
|
||||
model_config = _set_model_config()
|
||||
this_model = get_this_model()
|
||||
@ -304,6 +307,7 @@ class AutoTransformers:
|
||||
data_name = JobID.dataset_list_to_str(self.jobid_config.dat)
|
||||
if data_name in ("glue", "super_glue"):
|
||||
metric = datasets.load.load_metric(data_name, self.jobid_config.subdat)
|
||||
# TODO delete
|
||||
elif data_name in ("squad", "squad_v2"):
|
||||
metric = datasets.load.load_metric(data_name)
|
||||
else:
|
||||
@ -312,6 +316,7 @@ class AutoTransformers:
|
||||
|
||||
def _compute_metrics_by_dataset_name(self,
|
||||
eval_pred):
|
||||
# TODO coverage
|
||||
predictions, labels = eval_pred
|
||||
predictions = np.squeeze(predictions) \
|
||||
if self.task_name == "regression" else np.argmax(predictions, axis=1)
|
||||
@ -321,6 +326,7 @@ class AutoTransformers:
|
||||
def _compute_checkpoint_freq(self,
|
||||
num_train_epochs,
|
||||
batch_size):
|
||||
# TODO coverage
|
||||
if "gpu" in self._resources_per_trial:
|
||||
ckpt_step_freq = int(min(num_train_epochs, 1) * len(self.train_dataset) / batch_size
|
||||
/ self._resources_per_trial["gpu"] / self.ckpt_per_epoch) + 1
|
||||
@ -544,6 +550,7 @@ class AutoTransformers:
|
||||
_fp16=True,
|
||||
**custom_hpo_args
|
||||
):
|
||||
# TODO remove?
|
||||
from transformers.trainer_utils import HPSearchBackend
|
||||
|
||||
'''Fine tuning the huggingface using HF's API Transformers.hyperparameter_search (for comparitive purpose).
|
||||
@ -657,6 +664,7 @@ class AutoTransformers:
|
||||
return validation_metric
|
||||
|
||||
def _set_transformers_verbosity(self, transformers_verbose):
|
||||
# TODO coverage
|
||||
if transformers_verbose == transformers.logging.ERROR:
|
||||
transformers.logging.set_verbosity_error()
|
||||
elif transformers_verbose == transformers.logging.WARNING:
|
||||
|
||||
@ -77,6 +77,7 @@ def tokenize_superglue_wic(this_example,
|
||||
try:
|
||||
padding_direction = this_tokenizer.padding_side
|
||||
if padding_direction == "left":
|
||||
# TODO coverage
|
||||
padding_id = input_ids_sepp[0]
|
||||
while input_ids_sepp[ptr_sepp] == padding_id:
|
||||
ptr_sepp += 1
|
||||
@ -103,6 +104,7 @@ def tokenize_superglue_wic(this_example,
|
||||
which_sepp += 1
|
||||
ptr_sepp += 1
|
||||
else:
|
||||
# TODO coverage
|
||||
ptr_sepp += 1
|
||||
"""
|
||||
max_word_span is the maximum tokens of the word
|
||||
@ -131,6 +133,7 @@ def tokenize_glue(this_example,
|
||||
if len(sentence_keys) > 1:
|
||||
sentence1_key, sentence2_key = sentence_keys[0], sentence_keys[1]
|
||||
else:
|
||||
# TODO coverage
|
||||
sentence1_key = sentence_keys[0]
|
||||
sentence2_key = None
|
||||
|
||||
|
||||
@ -64,6 +64,7 @@ def get_default_and_alternative_metric(dataset_name_list: typing.List,
|
||||
|
||||
return default_metric, default_mode, all_metrics, all_mode
|
||||
else:
|
||||
# TODO coverage
|
||||
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
|
||||
|
||||
default_metric, default_mode = eval_name_mapping[0]
|
||||
|
||||
@ -59,6 +59,7 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
|
||||
if subdataset_name != "mnli":
|
||||
is_match = subdataset_name == each_subdataset_name
|
||||
else:
|
||||
# TODO coverage
|
||||
if dev_name == "validation_matched":
|
||||
is_match = each_file == "MNLI-m.tsv"
|
||||
else:
|
||||
@ -68,11 +69,13 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
|
||||
writer.write("index\tprediction\n")
|
||||
for index, item in enumerate(predictions):
|
||||
if subdataset_name == "stsb":
|
||||
# TODO coverage
|
||||
if item > 5.0:
|
||||
item = 5.0
|
||||
writer.write(f"{index}\t{item:3.3f}\n")
|
||||
else:
|
||||
if subdataset_name in ("rte", "qnli", "mnli"):
|
||||
# TODO coverage
|
||||
item = label_list[item]
|
||||
writer.write(f"{index}\t{item}\n")
|
||||
else:
|
||||
@ -80,6 +83,7 @@ def output_prediction_glue(output_path, zip_file_name, predictions, train_data,
|
||||
item = int(item)
|
||||
writer.write(f"{index}\t{item}\n")
|
||||
else:
|
||||
# TODO coverage
|
||||
writer.write(f"{index}\t{item:3.3f}\n")
|
||||
|
||||
shutil.make_archive(os.path.join(output_path, zip_file_name), 'zip', output_dir)
|
||||
|
||||
@ -42,6 +42,7 @@ def get_default_task(dataset_name_list: list, subdataset_name=None):
|
||||
"dataset_name and subdataset_name not correctly specified"
|
||||
default_task = eval_name_mapping[subdataset_name]
|
||||
else:
|
||||
# TODO coverage
|
||||
assert isinstance(eval_name_mapping, list), "dataset_name and subdataset_name not correctly specified"
|
||||
default_task = eval_name_mapping
|
||||
return default_task
|
||||
|
||||
@ -33,8 +33,10 @@ def bounded_gridunion(model_type=None,
|
||||
if "u" in custom_hpo_args["bound"][each_key]:
|
||||
upper = custom_hpo_args["bound"][each_key]["u"]
|
||||
else:
|
||||
# TODO coverage
|
||||
upper = 100000
|
||||
if "l" in custom_hpo_args["bound"][each_key]:
|
||||
# TODO coverage
|
||||
lower = custom_hpo_args["bound"][each_key]["l"]
|
||||
else:
|
||||
lower = -100000
|
||||
@ -42,6 +44,7 @@ def bounded_gridunion(model_type=None,
|
||||
upper_id = len(original_space)
|
||||
for x in range(len(original_space)):
|
||||
if original_space[x] > upper:
|
||||
# TODO coverage
|
||||
upper_id = x
|
||||
break
|
||||
lower_id = 0
|
||||
@ -121,6 +124,7 @@ def hpo_space_generic_grid(model_type=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None,
|
||||
**custom_hpo_args):
|
||||
# TODO coverage
|
||||
output_config = {
|
||||
"learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 1e-4, 1.5e-4],
|
||||
"num_train_epochs": [3, 10],
|
||||
@ -137,6 +141,7 @@ def hpo_space_small(model_type=None,
|
||||
subdataset_name=None,
|
||||
algo_mode=None,
|
||||
**custom_hpo_args):
|
||||
# TODO coverage
|
||||
config_json = AutoGridSearchSpace.from_model_and_dataset_name(
|
||||
model_type, model_size_type, dataset_name_list, subdataset_name, "hpo")
|
||||
output_config = {}
|
||||
|
||||
@ -70,9 +70,11 @@ class AutoSearchAlgorithm:
|
||||
|
||||
assert hpo_search_space, "hpo_search_space needs to be specified for calling AutoSearchAlgorithm.from_method_name"
|
||||
if not search_algo_name:
|
||||
# TODO coverage
|
||||
search_algo_name = "grid"
|
||||
if search_algo_name in SEARCH_ALGO_MAPPING.keys():
|
||||
if SEARCH_ALGO_MAPPING[search_algo_name] is None:
|
||||
# TODO coverage
|
||||
return None
|
||||
"""
|
||||
filtering the customized args for hpo from custom_hpo_args, keep those
|
||||
@ -91,6 +93,7 @@ class AutoSearchAlgorithm:
|
||||
: max(hpo_search_space["per_device_train_batch_size"].categories)},
|
||||
"""
|
||||
if search_algo_args_mode == "dft":
|
||||
# TODO coverage
|
||||
this_search_algo_kwargs = DEFAULT_SEARCH_ALGO_ARGS_MAPPING[search_algo_name](
|
||||
"dft",
|
||||
metric_name,
|
||||
@ -121,6 +124,7 @@ class AutoSearchAlgorithm:
|
||||
|
||||
@staticmethod
|
||||
def grid2list(grid_config):
|
||||
# TODO coverage
|
||||
key_val_list = [[(key, each_val) for each_val in val_list['grid_search']]
|
||||
for (key, val_list) in grid_config.items()]
|
||||
config_list = [dict(x) for x in itertools.product(*key_val_list)]
|
||||
@ -132,6 +136,7 @@ def get_search_algo_args_optuna(search_args_mode,
|
||||
metric_mode_name,
|
||||
hpo_search_space=None,
|
||||
**custom_hpo_args):
|
||||
# TODO coverage
|
||||
return {}
|
||||
|
||||
|
||||
@ -145,6 +150,7 @@ def default_search_algo_args_bs(search_args_mode,
|
||||
isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Categorical):
|
||||
min_epoch = min(hpo_search_space["num_train_epochs"].categories)
|
||||
else:
|
||||
# TODO coverage
|
||||
assert isinstance(hpo_search_space["num_train_epochs"], ray.tune.sample.Float)
|
||||
min_epoch = hpo_search_space["num_train_epochs"].lower
|
||||
default_search_algo_args = {
|
||||
@ -166,6 +172,7 @@ def default_search_algo_args_grid_search(search_args_mode,
|
||||
metric_mode_name,
|
||||
hpo_search_space=None,
|
||||
**custom_hpo_args):
|
||||
# TODO coverage
|
||||
return {}
|
||||
|
||||
|
||||
@ -174,6 +181,7 @@ def default_search_algo_args_random_search(search_args_mode,
|
||||
metric_mode_name,
|
||||
hpo_search_space=None,
|
||||
**custom_hpo_args):
|
||||
# TODO coverage
|
||||
return {}
|
||||
|
||||
|
||||
|
||||
@ -17,6 +17,7 @@ class TrainerForAutoTransformers(TFTrainer):
|
||||
eval_dataset:
|
||||
the dataset to be evaluated
|
||||
"""
|
||||
# TODO coverage
|
||||
from ray import tune
|
||||
|
||||
eval_dataloader = self.get_eval_dataloader(eval_dataset)
|
||||
@ -38,6 +39,7 @@ class TrainerForAutoTransformers(TFTrainer):
|
||||
Overriding transformers.Trainer.save_state. It is only through saving
|
||||
the states can best_trial.get_best_checkpoint return a non-empty value.
|
||||
"""
|
||||
# TODO coverage
|
||||
import torch
|
||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||
from ray import tune
|
||||
@ -80,6 +82,7 @@ class TrainerForAutoTransformers(TFTrainer):
|
||||
device_count=None):
|
||||
if max_steps:
|
||||
return int(warmup_ratio * max_steps)
|
||||
# TODO coverage
|
||||
max_steps = TrainerForAutoTransformers.convert_num_train_epochs_to_max_steps(
|
||||
num_train_epochs,
|
||||
num_train_examples,
|
||||
|
||||
@ -360,6 +360,7 @@ class JobID:
|
||||
print("console_args has no attribute {}, continue".format(each_key))
|
||||
continue
|
||||
if self.mod == "grid":
|
||||
# TODO coverage
|
||||
self.alg = "grid"
|
||||
|
||||
|
||||
@ -447,6 +448,7 @@ class AzureUtils:
|
||||
if autohf is not None:
|
||||
self.jobid = autohf.jobid_config
|
||||
else:
|
||||
# TODO coverage
|
||||
assert jobid_config is not None, "jobid_config must be passed either through autohf.jobid_config" \
|
||||
" or jobid_config"
|
||||
self.jobid = jobid_config
|
||||
@ -527,6 +529,7 @@ class AzureUtils:
|
||||
print("Your output will not be synced to azure because azure-blob-storage is not installed")
|
||||
|
||||
def download_azure_blob(self, blobname):
|
||||
# TODO coverage
|
||||
blob_client = self._init_blob_client(blobname)
|
||||
if blob_client:
|
||||
pathlib.Path(re.search("(?P<parent_path>^.*)/[^/]+$", blobname).group("parent_path")).mkdir(
|
||||
@ -549,6 +552,7 @@ class AzureUtils:
|
||||
metric_score = each_trial.metric_analysis["eval_" + analysis.default_metric]
|
||||
time_stamp = each_trial.metric_analysis['timestamp']
|
||||
except KeyError:
|
||||
# TODO coverage
|
||||
print("KeyError, {} does not contain the key {} or {}".format("each_trial.metric_analysis",
|
||||
"eval_" + analysis.default_metric,
|
||||
"timestamp"))
|
||||
@ -608,6 +612,7 @@ class AzureUtils:
|
||||
"""
|
||||
azure_save_file_name = local_json_file.split("/")[-1][:-5]
|
||||
if self.data_root_dir is None:
|
||||
# TODO coverage
|
||||
from ..utils import load_dft_args
|
||||
console_args = load_dft_args()
|
||||
output_dir = getattr(console_args, "data_root_dir")
|
||||
@ -622,6 +627,7 @@ class AzureUtils:
|
||||
|
||||
@staticmethod
|
||||
def is_after_earliest_time(this_blob, earliest_time: Tuple[int, int, int]):
|
||||
# TODO coverage
|
||||
import pytz
|
||||
utc = pytz.UTC
|
||||
if this_blob.last_modified >= utc.localize(datetime(earliest_time[0], earliest_time[1], earliest_time[2])):
|
||||
@ -639,6 +645,7 @@ class AzureUtils:
|
||||
container_client = self._init_azure_clients()
|
||||
if container_client:
|
||||
for each_blob in container_client.list_blobs():
|
||||
# TODO coverage
|
||||
if each_blob.name.startswith(root_log_path):
|
||||
each_jobconfig = JobID.convert_blobname_to_jobid(each_blob.name)
|
||||
is_append = False
|
||||
@ -701,6 +708,7 @@ class AzureUtils:
|
||||
"""
|
||||
matched_config_score_lists = []
|
||||
for (each_jobconfig, each_blob) in matched_blob_list:
|
||||
# TODO coverage
|
||||
self.download_azure_blob(each_blob.name)
|
||||
data_json = json.load(open(each_blob.name, "r"))
|
||||
each_config_and_score_list = ConfigScoreList(
|
||||
|
||||
@ -35,6 +35,7 @@ class WandbUtils:
|
||||
os.environ["WANDB_API_KEY"] = wandb_key
|
||||
os.environ["WANDB_MODE"] = "online"
|
||||
else:
|
||||
# TODO coverage
|
||||
os.environ["WANDB_MODE"] = "disabled"
|
||||
self.jobid_config = jobid_config
|
||||
|
||||
@ -53,6 +54,7 @@ class WandbUtils:
|
||||
return ""
|
||||
|
||||
def set_wandb_per_trial(self):
|
||||
# TODO coverage
|
||||
print("before wandb.init\n\n\n")
|
||||
try:
|
||||
import wandb
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.6.1"
|
||||
__version__ = "0.6.2"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user