mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-23 08:52:56 +00:00
Sklearn api x (#405)
* changed signature of automl.predict and automl.predict_proba to X * XGBoostEstimator * changed signature of Prophet predict to X * changed signature of ARIMA predict to X * changed signature of TS_SKLearn_Regressor predict to X
This commit is contained in:
parent
a6d70efff7
commit
1c911da9f8
@ -714,13 +714,11 @@ class AutoML(BaseEstimator):
|
|||||||
"""Time taken to find best model in seconds."""
|
"""Time taken to find best model in seconds."""
|
||||||
return self.__dict__.get("_time_taken_best_iter")
|
return self.__dict__.get("_time_taken_best_iter")
|
||||||
|
|
||||||
def predict(
|
def predict(self, X: Union[np.array, pd.DataFrame, List[str], List[List[str]]]):
|
||||||
self, X_test: Union[np.array, pd.DataFrame, List[str], List[List[str]]]
|
|
||||||
):
|
|
||||||
"""Predict label from features.
|
"""Predict label from features.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_test: A numpy array of featurized instances, shape n * m,
|
X: A numpy array of featurized instances, shape n * m,
|
||||||
or for 'ts_forecast' task:
|
or for 'ts_forecast' task:
|
||||||
a pandas dataframe with the first column containing
|
a pandas dataframe with the first column containing
|
||||||
timestamp values (datetime type) or an integer n for
|
timestamp values (datetime type) or an integer n for
|
||||||
@ -748,8 +746,8 @@ class AutoML(BaseEstimator):
|
|||||||
"No estimator is trained. Please run fit with enough budget."
|
"No estimator is trained. Please run fit with enough budget."
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
X_test = self._preprocess(X_test)
|
X = self._preprocess(X)
|
||||||
y_pred = estimator.predict(X_test)
|
y_pred = estimator.predict(X)
|
||||||
if (
|
if (
|
||||||
isinstance(y_pred, np.ndarray)
|
isinstance(y_pred, np.ndarray)
|
||||||
and y_pred.ndim > 1
|
and y_pred.ndim > 1
|
||||||
@ -763,12 +761,12 @@ class AutoML(BaseEstimator):
|
|||||||
else:
|
else:
|
||||||
return y_pred
|
return y_pred
|
||||||
|
|
||||||
def predict_proba(self, X_test):
|
def predict_proba(self, X):
|
||||||
"""Predict the probability of each class from features, only works for
|
"""Predict the probability of each class from features, only works for
|
||||||
classification problems.
|
classification problems.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_test: A numpy array of featurized instances, shape n * m.
|
X: A numpy array of featurized instances, shape n * m.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A numpy array of shape n * c. c is the # classes. Each element at
|
A numpy array of shape n * c. c is the # classes. Each element at
|
||||||
@ -780,8 +778,8 @@ class AutoML(BaseEstimator):
|
|||||||
"No estimator is trained. Please run fit with enough budget."
|
"No estimator is trained. Please run fit with enough budget."
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
X_test = self._preprocess(X_test)
|
X = self._preprocess(X)
|
||||||
proba = self._trained_estimator.predict_proba(X_test)
|
proba = self._trained_estimator.predict_proba(X)
|
||||||
return proba
|
return proba
|
||||||
|
|
||||||
def _preprocess(self, X):
|
def _preprocess(self, X):
|
||||||
|
@ -197,32 +197,32 @@ class BaseEstimator:
|
|||||||
train_time = self._fit(X_train, y_train, **kwargs)
|
train_time = self._fit(X_train, y_train, **kwargs)
|
||||||
return train_time
|
return train_time
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X):
|
||||||
"""Predict label from features.
|
"""Predict label from features.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_test: A numpy array or a dataframe of featurized instances, shape n*m.
|
X: A numpy array or a dataframe of featurized instances, shape n*m.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A numpy array of shape n*1.
|
A numpy array of shape n*1.
|
||||||
Each element is the label for a instance.
|
Each element is the label for a instance.
|
||||||
"""
|
"""
|
||||||
if self._model is not None:
|
if self._model is not None:
|
||||||
X_test = self._preprocess(X_test)
|
X = self._preprocess(X)
|
||||||
return self._model.predict(X_test)
|
return self._model.predict(X)
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Estimator is not fit yet. Please run fit() before predict()."
|
"Estimator is not fit yet. Please run fit() before predict()."
|
||||||
)
|
)
|
||||||
return np.ones(X_test.shape[0])
|
return np.ones(X.shape[0])
|
||||||
|
|
||||||
def predict_proba(self, X_test):
|
def predict_proba(self, X):
|
||||||
"""Predict the probability of each class from features.
|
"""Predict the probability of each class from features.
|
||||||
|
|
||||||
Only works for classification problems
|
Only works for classification problems
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_test: A numpy array of featurized instances, shape n*m.
|
X: A numpy array of featurized instances, shape n*m.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A numpy array of shape n*c. c is the # classes.
|
A numpy array of shape n*c. c is the # classes.
|
||||||
@ -231,8 +231,8 @@ class BaseEstimator:
|
|||||||
"""
|
"""
|
||||||
assert self._task in CLASSIFICATION, "predict_proba() only for classification."
|
assert self._task in CLASSIFICATION, "predict_proba() only for classification."
|
||||||
|
|
||||||
X_test = self._preprocess(X_test)
|
X = self._preprocess(X)
|
||||||
return self._model.predict_proba(X_test)
|
return self._model.predict_proba(X)
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
del self._model
|
del self._model
|
||||||
@ -708,18 +708,18 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
)
|
)
|
||||||
return test_dataset, training_args
|
return test_dataset, training_args
|
||||||
|
|
||||||
def predict_proba(self, X_test):
|
def predict_proba(self, X):
|
||||||
assert (
|
assert (
|
||||||
self._task in CLASSIFICATION
|
self._task in CLASSIFICATION
|
||||||
), "predict_proba() only for classification tasks."
|
), "predict_proba() only for classification tasks."
|
||||||
|
|
||||||
test_dataset, _ = self._init_model_for_predict(X_test)
|
test_dataset, _ = self._init_model_for_predict(X)
|
||||||
predictions = self._trainer.predict(test_dataset)
|
predictions = self._trainer.predict(test_dataset)
|
||||||
self._trainer = None
|
self._trainer = None
|
||||||
return predictions.predictions
|
return predictions.predictions
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X):
|
||||||
test_dataset, training_args = self._init_model_for_predict(X_test)
|
test_dataset, training_args = self._init_model_for_predict(X)
|
||||||
if self._task not in NLG_TASKS:
|
if self._task not in NLG_TASKS:
|
||||||
predictions = self._trainer.predict(test_dataset)
|
predictions = self._trainer.predict(test_dataset)
|
||||||
else:
|
else:
|
||||||
@ -1108,12 +1108,12 @@ class XGBoostEstimator(SKLearnEstimator):
|
|||||||
train_time = time.time() - start_time
|
train_time = time.time() - start_time
|
||||||
return train_time
|
return train_time
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X):
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
if not issparse(X_test):
|
if not issparse(X):
|
||||||
X_test = self._preprocess(X_test)
|
X = self._preprocess(X)
|
||||||
dtest = xgb.DMatrix(X_test)
|
dtest = xgb.DMatrix(X)
|
||||||
return super().predict(dtest)
|
return super().predict(dtest)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1598,22 +1598,22 @@ class Prophet(SKLearnEstimator):
|
|||||||
self._model = model
|
self._model = model
|
||||||
return train_time
|
return train_time
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X):
|
||||||
if isinstance(X_test, int):
|
if isinstance(X, int):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"predict() with steps is only supported for arima/sarimax."
|
"predict() with steps is only supported for arima/sarimax."
|
||||||
" For Prophet, pass a dataframe with the first column containing"
|
" For Prophet, pass a dataframe with the first column containing"
|
||||||
" the timestamp values."
|
" the timestamp values."
|
||||||
)
|
)
|
||||||
if self._model is not None:
|
if self._model is not None:
|
||||||
X_test = self._preprocess(X_test)
|
X = self._preprocess(X)
|
||||||
forecast = self._model.predict(X_test)
|
forecast = self._model.predict(X)
|
||||||
return forecast["yhat"]
|
return forecast["yhat"]
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Estimator is not fit yet. Please run fit() before predict()."
|
"Estimator is not fit yet. Please run fit() before predict()."
|
||||||
)
|
)
|
||||||
return np.ones(X_test.shape[0])
|
return np.ones(X.shape[0])
|
||||||
|
|
||||||
|
|
||||||
class ARIMA(Prophet):
|
class ARIMA(Prophet):
|
||||||
@ -1678,30 +1678,30 @@ class ARIMA(Prophet):
|
|||||||
self._model = model
|
self._model = model
|
||||||
return train_time
|
return train_time
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X):
|
||||||
if self._model is not None:
|
if self._model is not None:
|
||||||
if isinstance(X_test, int):
|
if isinstance(X, int):
|
||||||
forecast = self._model.forecast(steps=X_test)
|
forecast = self._model.forecast(steps=X)
|
||||||
elif isinstance(X_test, DataFrame):
|
elif isinstance(X, DataFrame):
|
||||||
start = X_test[TS_TIMESTAMP_COL].iloc[0]
|
start = X[TS_TIMESTAMP_COL].iloc[0]
|
||||||
end = X_test[TS_TIMESTAMP_COL].iloc[-1]
|
end = X[TS_TIMESTAMP_COL].iloc[-1]
|
||||||
if len(X_test.columns) > 1:
|
if len(X.columns) > 1:
|
||||||
X_test = self._preprocess(X_test.drop(columns=TS_TIMESTAMP_COL))
|
X = self._preprocess(X.drop(columns=TS_TIMESTAMP_COL))
|
||||||
regressors = list(X_test)
|
regressors = list(X)
|
||||||
print(start, end, X_test.shape)
|
print(start, end, X.shape)
|
||||||
forecast = self._model.predict(
|
forecast = self._model.predict(
|
||||||
start=start, end=end, exog=X_test[regressors]
|
start=start, end=end, exog=X[regressors]
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
forecast = self._model.predict(start=start, end=end)
|
forecast = self._model.predict(start=start, end=end)
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"X_test needs to be either a pandas Dataframe with dates as the first column"
|
"X needs to be either a pandas Dataframe with dates as the first column"
|
||||||
" or an int number of periods for predict()."
|
" or an int number of periods for predict()."
|
||||||
)
|
)
|
||||||
return forecast
|
return forecast
|
||||||
else:
|
else:
|
||||||
return np.ones(X_test if isinstance(X_test, int) else X_test.shape[0])
|
return np.ones(X if isinstance(X, int) else X.shape[0])
|
||||||
|
|
||||||
|
|
||||||
class SARIMAX(ARIMA):
|
class SARIMAX(ARIMA):
|
||||||
@ -1873,42 +1873,40 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
|||||||
train_time = time.time() - current_time
|
train_time = time.time() - current_time
|
||||||
return train_time
|
return train_time
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X):
|
||||||
if self._model is not None:
|
if self._model is not None:
|
||||||
X_test = self.transform_X(X_test)
|
X = self.transform_X(X)
|
||||||
X_test = self._preprocess(X_test)
|
X = self._preprocess(X)
|
||||||
if isinstance(self._model, list):
|
if isinstance(self._model, list):
|
||||||
assert len(self._model) == len(
|
assert len(self._model) == len(
|
||||||
X_test
|
X
|
||||||
), "Model is optimized for horizon, length of X_test must be equal to `period`."
|
), "Model is optimized for horizon, length of X must be equal to `period`."
|
||||||
preds = []
|
preds = []
|
||||||
for i in range(1, len(self._model) + 1):
|
for i in range(1, len(self._model) + 1):
|
||||||
(
|
(
|
||||||
X_pred,
|
X_pred,
|
||||||
_,
|
_,
|
||||||
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||||
X_test.iloc[:i, :]
|
X.iloc[:i, :]
|
||||||
)
|
)
|
||||||
preds.append(self._model[i - 1].predict(X_pred)[-1])
|
preds.append(self._model[i - 1].predict(X_pred)[-1])
|
||||||
forecast = DataFrame(
|
forecast = DataFrame(
|
||||||
data=np.asarray(preds).reshape(-1, 1),
|
data=np.asarray(preds).reshape(-1, 1),
|
||||||
columns=[self.hcrystaball_model.name],
|
columns=[self.hcrystaball_model.name],
|
||||||
index=X_test.index,
|
index=X.index,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
(
|
(
|
||||||
X_pred,
|
X_pred,
|
||||||
_,
|
_,
|
||||||
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X)
|
||||||
X_test
|
|
||||||
)
|
|
||||||
forecast = self._model.predict(X_pred)
|
forecast = self._model.predict(X_pred)
|
||||||
return forecast
|
return forecast
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Estimator is not fit yet. Please run fit() before predict()."
|
"Estimator is not fit yet. Please run fit() before predict()."
|
||||||
)
|
)
|
||||||
return np.ones(X_test.shape[0])
|
return np.ones(X.shape[0])
|
||||||
|
|
||||||
|
|
||||||
class LGBM_TS_Regressor(TS_SKLearn_Regressor):
|
class LGBM_TS_Regressor(TS_SKLearn_Regressor):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user