mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-12 17:56:20 +00:00

* add pipeline tuner component and dependencies. * clean code. * do not need force rerun. * replace the resources. * update metrics retrieving. * Update test/pipeline_tuning_example/requirements.txt * Update test/pipeline_tuning_example/train/env.yaml * Update test/pipeline_tuning_example/tuner/env.yaml * Update test/pipeline_tuning_example/tuner/tuner_func.py * Update test/pipeline_tuning_example/data_prep/env.yaml * fix issues found by lint with flake8. * add documentation * add data. * do not need AML resource for local run. * AML -> AzureML * clean code. * Update website/docs/Examples/Tune-AzureML pipeline.md * rename and add pip install. * update figure name. * align docs with code. * remove extra line.
62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
import argparse
|
|
import lightgbm as lgb
|
|
import os
|
|
import pandas as pd
|
|
from azureml.core import Run
|
|
|
|
|
|
class LightGBMCallbackHandler():
|
|
def __init__(self):
|
|
pass
|
|
|
|
def callback(self, env: lgb.callback.CallbackEnv) -> None:
|
|
"""Callback method to collect metrics produced by LightGBM.
|
|
|
|
See https://lightgbm.readthedocs.io/en/latest/_modules/lightgbm/callback.html
|
|
"""
|
|
# loop on all the evaluation results tuples
|
|
print("env.evaluation_result_list:", env.evaluation_result_list)
|
|
for data_name, eval_name, result, _ in env.evaluation_result_list:
|
|
run = Run.get_context()
|
|
run.log(f"{data_name}_{eval_name}", result)
|
|
|
|
|
|
def main(args):
|
|
"""Main function of the script."""
|
|
|
|
train_path = os.path.join(args.train_data, 'data.csv')
|
|
print("traning_path:", train_path)
|
|
|
|
test_path = os.path.join(args.test_data, 'data.csv')
|
|
|
|
train_set = lgb.Dataset(train_path)
|
|
test_set = lgb.Dataset(test_path)
|
|
callbacks_handler = LightGBMCallbackHandler()
|
|
config = {"header": True, "objective": "binary", "label_column": 30, "metric": "binary_error",
|
|
"n_estimators": args.n_estimators, "learning_rate": args.learning_rate}
|
|
gbm = lgb.train(
|
|
config,
|
|
train_set,
|
|
valid_sets=[test_set],
|
|
valid_names=["eval"],
|
|
callbacks=[
|
|
callbacks_handler.callback,
|
|
],
|
|
)
|
|
|
|
print('Saving model...')
|
|
# save model to file
|
|
gbm.save_model(os.path.join(args.model, 'model.txt'))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# input and output arguments
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--train_data", type=str, help="path to train data")
|
|
parser.add_argument("--test_data", type=str, help="path to test data")
|
|
parser.add_argument("--n_estimators", required=False, default=100, type=int)
|
|
parser.add_argument("--learning_rate", required=False, default=0.1, type=float)
|
|
parser.add_argument("--model", type=str, help="path to output directory")
|
|
args = parser.parse_args()
|
|
main(args)
|