autogen/test/automl/test_utils.py

import numpy as np
from flaml.automl.utils import len_labels, unique_value_first_index


def test_len_labels():
    assert len_labels([1, 2, 3]) == 3
    assert len_labels([1, 2, 3, 1, 2, 3]) == 3
    assert np.array_equal(len_labels([1, 2, 3], True)[1], [1, 2, 3])
    assert np.array_equal(len_labels([1, 2, 3, 1, 2, 3], True)[1], [1, 2, 3])


def test_unique_value_first_index():
    label_set, first_index = unique_value_first_index([1, 2, 2, 3])
    assert np.array_equal(label_set, np.array([1, 2, 3]))
    assert np.array_equal(first_index, np.array([0, 1, 3]))


if __name__ == "__main__":
    test_len_labels()
    test_unique_value_first_index()
Support spark dataframe as input dataset and spark models as estimators (#934) * add basic support to Spark dataframe add support to SynapseML LightGBM model update to pyspark>=3.2.0 to leverage pandas_on_Spark API * clean code, add TODOs * add sample_train_data for pyspark.pandas dataframe, fix bugs * improve some functions, fix bugs * fix dict change size during iteration * update model predict * update LightGBM model, update test * update SynapseML LightGBM params * update synapseML and tests * update TODOs * Added support to roc_auc for spark models * Added support to score of spark estimator * Added test for automl score of spark estimator * Added cv support to pyspark.pandas dataframe * Update test, fix bugs * Added tests * Updated docs, tests, added a notebook * Fix bugs in non-spark env * Fix bugs and improve tests * Fix uninstall pyspark * Fix tests error * Fix java.lang.OutOfMemoryError: Java heap space * Fix test_performance * Update test_sparkml to test_0sparkml to use the expected spark conf * Remove unnecessary widgets in notebook * Fix iloc java.lang.StackOverflowError * fix pre-commit * Added params check for spark dataframes * Refactor code for train_test_split to a function * Update train_test_split_pyspark * Refactor if-else, remove unnecessary code * Remove y from predict, remove mem control from n_iter compute * Update workflow * Improve _split_pyspark * Fix test failure of too short training time * Fix typos, improve docstrings * Fix index errors of pandas_on_spark, add spark loss metric * Fix typo of ndcgAtK * Update NDCG metrics and tests * Remove unuseful logger * Use cache and count to ensure consistent indexes * refactor for merge maain * fix errors of refactor * Updated SparkLightGBMEstimator and cache * Updated config2params * Remove unused import * Fix unknown parameters * Update default_estimator_list * Add unit tests for spark metrics 2023-03-26 03:59:46 +08:00			`import numpy as np`
			`from flaml.automl.utils import len_labels, unique_value_first_index`


			`def test_len_labels():`
			`assert len_labels([1, 2, 3]) == 3`
			`assert len_labels([1, 2, 3, 1, 2, 3]) == 3`
			`assert np.array_equal(len_labels([1, 2, 3], True)[1], [1, 2, 3])`
			`assert np.array_equal(len_labels([1, 2, 3, 1, 2, 3], True)[1], [1, 2, 3])`


			`def test_unique_value_first_index():`
			`label_set, first_index = unique_value_first_index([1, 2, 2, 3])`
			`assert np.array_equal(label_set, np.array([1, 2, 3]))`
			`assert np.array_equal(first_index, np.array([0, 1, 3]))`


			`if __name__ == "__main__":`
			`test_len_labels()`
			`test_unique_value_first_index()`