mirror of
https://github.com/microsoft/autogen.git
synced 2025-12-14 08:37:54 +00:00
datetime columns preprocess for validation data fixed. (#73)
* datetime columns preprocess for validation data fixed. * code line formatted.
This commit is contained in:
parent
f4f3f4f17b
commit
ad42889a3b
@ -192,12 +192,13 @@ class DataTransformer:
|
|||||||
if isinstance(X, pd.DataFrame):
|
if isinstance(X, pd.DataFrame):
|
||||||
X = X.copy()
|
X = X.copy()
|
||||||
n = X.shape[0]
|
n = X.shape[0]
|
||||||
cat_columns, num_columns = [], []
|
cat_columns, num_columns, datetime_columns = [], [], []
|
||||||
drop = False
|
drop = False
|
||||||
for column in X.columns:
|
for column in X.columns:
|
||||||
# sklearn\utils\validation.py needs int/float values
|
# sklearn\utils\validation.py needs int/float values
|
||||||
if X[column].dtype.name == 'datetime64[ns]':
|
if X[column].dtype.name == 'datetime64[ns]':
|
||||||
X[column] = X[column].map(datetime.toordinal)
|
X[column] = X[column].map(datetime.toordinal)
|
||||||
|
datetime_columns.append(column)
|
||||||
if X[column].dtype.name in ('object', 'category'):
|
if X[column].dtype.name in ('object', 'category'):
|
||||||
if X[column].nunique() == 1 or X[column].nunique(
|
if X[column].nunique() == 1 or X[column].nunique(
|
||||||
dropna=True) == n - X[column].isnull().sum():
|
dropna=True) == n - X[column].isnull().sum():
|
||||||
@ -236,7 +237,8 @@ class DataTransformer:
|
|||||||
SimpleImputer(missing_values=np.nan, strategy='median'),
|
SimpleImputer(missing_values=np.nan, strategy='median'),
|
||||||
X_num.columns)])
|
X_num.columns)])
|
||||||
X[num_columns] = self.transformer.fit_transform(X_num)
|
X[num_columns] = self.transformer.fit_transform(X_num)
|
||||||
self._cat_columns, self._num_columns = cat_columns, num_columns
|
self._cat_columns, self._num_columns, self._datetime_columns = cat_columns, \
|
||||||
|
num_columns, datetime_columns
|
||||||
self._drop = drop
|
self._drop = drop
|
||||||
|
|
||||||
if task == 'regression':
|
if task == 'regression':
|
||||||
@ -249,7 +251,11 @@ class DataTransformer:
|
|||||||
|
|
||||||
def transform(self, X):
|
def transform(self, X):
|
||||||
if isinstance(X, pd.DataFrame):
|
if isinstance(X, pd.DataFrame):
|
||||||
cat_columns, num_columns = self._cat_columns, self._num_columns
|
cat_columns, num_columns, datetime_columns = self._cat_columns, \
|
||||||
|
self._num_columns, self._datetime_columns
|
||||||
|
if datetime_columns:
|
||||||
|
for dt_column in datetime_columns:
|
||||||
|
X[dt_column] = X[dt_column].map(datetime.toordinal)
|
||||||
X = X[cat_columns + num_columns].copy()
|
X = X[cat_columns + num_columns].copy()
|
||||||
for column in cat_columns:
|
for column in cat_columns:
|
||||||
# print(column, X[column].dtype.name)
|
# print(column, X[column].dtype.name)
|
||||||
|
|||||||
@ -4,6 +4,9 @@ import numpy as np
|
|||||||
import scipy.sparse
|
import scipy.sparse
|
||||||
from sklearn.datasets import load_boston, load_iris, load_wine
|
from sklearn.datasets import load_boston, load_iris, load_wine
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
from flaml.data import get_output_from_log
|
from flaml.data import get_output_from_log
|
||||||
|
|
||||||
@ -219,6 +222,23 @@ class TestAutoML(unittest.TestCase):
|
|||||||
print(automl_experiment.model)
|
print(automl_experiment.model)
|
||||||
print(automl_experiment.predict_proba(X_train)[:5])
|
print(automl_experiment.predict_proba(X_train)[:5])
|
||||||
|
|
||||||
|
def test_datetime_columns(self):
|
||||||
|
|
||||||
|
automl_experiment = AutoML()
|
||||||
|
automl_settings = {
|
||||||
|
"time_budget": 2,
|
||||||
|
"metric": 'mse',
|
||||||
|
"task": 'regression',
|
||||||
|
"log_file_name": "test/datetime_columns.log",
|
||||||
|
"log_training_metric": True,
|
||||||
|
"n_jobs": 1,
|
||||||
|
"model_history": True
|
||||||
|
}
|
||||||
|
|
||||||
|
fake_df = pd.DataFrame({'A': [datetime(1900, 2, 3), datetime(1900, 3, 4)]})
|
||||||
|
y = np.array([0, 1])
|
||||||
|
automl_experiment.fit(X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)
|
||||||
|
|
||||||
def test_regression(self):
|
def test_regression(self):
|
||||||
|
|
||||||
automl_experiment = AutoML()
|
automl_experiment = AutoML()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user