autogen/flaml/automl/spark/configs.py

ParamList_LightGBM_Base = [
    "baggingFraction",
    "baggingFreq",
    "baggingSeed",
    "binSampleCount",
    "boostFromAverage",
    "boostingType",
    "catSmooth",
    "categoricalSlotIndexes",
    "categoricalSlotNames",
    "catl2",
    "chunkSize",
    "dataRandomSeed",
    "defaultListenPort",
    "deterministic",
    "driverListenPort",
    "dropRate",
    "dropSeed",
    "earlyStoppingRound",
    "executionMode",
    "extraSeed" "featureFraction",
    "featureFractionByNode",
    "featureFractionSeed",
    "featuresCol",
    "featuresShapCol",
    "fobj" "improvementTolerance",
    "initScoreCol",
    "isEnableSparse",
    "isProvideTrainingMetric",
    "labelCol",
    "lambdaL1",
    "lambdaL2",
    "leafPredictionCol",
    "learningRate",
    "matrixType",
    "maxBin",
    "maxBinByFeature",
    "maxCatThreshold",
    "maxCatToOnehot",
    "maxDeltaStep",
    "maxDepth",
    "maxDrop",
    "metric",
    "microBatchSize",
    "minDataInLeaf",
    "minDataPerBin",
    "minDataPerGroup",
    "minGainToSplit",
    "minSumHessianInLeaf",
    "modelString",
    "monotoneConstraints",
    "monotoneConstraintsMethod",
    "monotonePenalty",
    "negBaggingFraction",
    "numBatches",
    "numIterations",
    "numLeaves",
    "numTasks",
    "numThreads",
    "objectiveSeed",
    "otherRate",
    "parallelism",
    "passThroughArgs",
    "posBaggingFraction",
    "predictDisableShapeCheck",
    "predictionCol",
    "repartitionByGroupingColumn",
    "seed",
    "skipDrop",
    "slotNames",
    "timeout",
    "topK",
    "topRate",
    "uniformDrop",
    "useBarrierExecutionMode",
    "useMissing",
    "useSingleDatasetMode",
    "validationIndicatorCol",
    "verbosity",
    "weightCol",
    "xGBoostDartMode",
    "zeroAsMissing",
    "objective",
]
ParamList_LightGBM_Classifier = ParamList_LightGBM_Base + [
    "isUnbalance",
    "probabilityCol",
    "rawPredictionCol",
    "thresholds",
]
ParamList_LightGBM_Regressor = ParamList_LightGBM_Base + ["tweedieVariancePower"]
ParamList_LightGBM_Ranker = ParamList_LightGBM_Base + [
    "groupCol",
    "evalAt",
    "labelGain",
    "maxPosition",
]
Support spark dataframe as input dataset and spark models as estimators (#934) * add basic support to Spark dataframe add support to SynapseML LightGBM model update to pyspark>=3.2.0 to leverage pandas_on_Spark API * clean code, add TODOs * add sample_train_data for pyspark.pandas dataframe, fix bugs * improve some functions, fix bugs * fix dict change size during iteration * update model predict * update LightGBM model, update test * update SynapseML LightGBM params * update synapseML and tests * update TODOs * Added support to roc_auc for spark models * Added support to score of spark estimator * Added test for automl score of spark estimator * Added cv support to pyspark.pandas dataframe * Update test, fix bugs * Added tests * Updated docs, tests, added a notebook * Fix bugs in non-spark env * Fix bugs and improve tests * Fix uninstall pyspark * Fix tests error * Fix java.lang.OutOfMemoryError: Java heap space * Fix test_performance * Update test_sparkml to test_0sparkml to use the expected spark conf * Remove unnecessary widgets in notebook * Fix iloc java.lang.StackOverflowError * fix pre-commit * Added params check for spark dataframes * Refactor code for train_test_split to a function * Update train_test_split_pyspark * Refactor if-else, remove unnecessary code * Remove y from predict, remove mem control from n_iter compute * Update workflow * Improve _split_pyspark * Fix test failure of too short training time * Fix typos, improve docstrings * Fix index errors of pandas_on_spark, add spark loss metric * Fix typo of ndcgAtK * Update NDCG metrics and tests * Remove unuseful logger * Use cache and count to ensure consistent indexes * refactor for merge maain * fix errors of refactor * Updated SparkLightGBMEstimator and cache * Updated config2params * Remove unused import * Fix unknown parameters * Update default_estimator_list * Add unit tests for spark metrics 2023-03-26 03:59:46 +08:00			`ParamList_LightGBM_Base = [`
			`"baggingFraction",`
			`"baggingFreq",`
			`"baggingSeed",`
			`"binSampleCount",`
			`"boostFromAverage",`
			`"boostingType",`
			`"catSmooth",`
			`"categoricalSlotIndexes",`
			`"categoricalSlotNames",`
			`"catl2",`
			`"chunkSize",`
			`"dataRandomSeed",`
			`"defaultListenPort",`
			`"deterministic",`
			`"driverListenPort",`
			`"dropRate",`
			`"dropSeed",`
			`"earlyStoppingRound",`
			`"executionMode",`
			`"extraSeed" "featureFraction",`
			`"featureFractionByNode",`
			`"featureFractionSeed",`
			`"featuresCol",`
			`"featuresShapCol",`
			`"fobj" "improvementTolerance",`
			`"initScoreCol",`
			`"isEnableSparse",`
			`"isProvideTrainingMetric",`
			`"labelCol",`
			`"lambdaL1",`
			`"lambdaL2",`
			`"leafPredictionCol",`
			`"learningRate",`
			`"matrixType",`
			`"maxBin",`
			`"maxBinByFeature",`
			`"maxCatThreshold",`
			`"maxCatToOnehot",`
			`"maxDeltaStep",`
			`"maxDepth",`
			`"maxDrop",`
			`"metric",`
			`"microBatchSize",`
			`"minDataInLeaf",`
			`"minDataPerBin",`
			`"minDataPerGroup",`
			`"minGainToSplit",`
			`"minSumHessianInLeaf",`
			`"modelString",`
			`"monotoneConstraints",`
			`"monotoneConstraintsMethod",`
			`"monotonePenalty",`
			`"negBaggingFraction",`
			`"numBatches",`
			`"numIterations",`
			`"numLeaves",`
			`"numTasks",`
			`"numThreads",`
			`"objectiveSeed",`
			`"otherRate",`
			`"parallelism",`
			`"passThroughArgs",`
			`"posBaggingFraction",`
			`"predictDisableShapeCheck",`
			`"predictionCol",`
			`"repartitionByGroupingColumn",`
			`"seed",`
			`"skipDrop",`
			`"slotNames",`
			`"timeout",`
			`"topK",`
			`"topRate",`
			`"uniformDrop",`
			`"useBarrierExecutionMode",`
			`"useMissing",`
			`"useSingleDatasetMode",`
			`"validationIndicatorCol",`
			`"verbosity",`
			`"weightCol",`
			`"xGBoostDartMode",`
			`"zeroAsMissing",`
			`"objective",`
			`]`
			`ParamList_LightGBM_Classifier = ParamList_LightGBM_Base + [`
			`"isUnbalance",`
			`"probabilityCol",`
			`"rawPredictionCol",`
			`"thresholds",`
			`]`
			`ParamList_LightGBM_Regressor = ParamList_LightGBM_Base + ["tweedieVariancePower"]`
			`ParamList_LightGBM_Ranker = ParamList_LightGBM_Base + [`
			`"groupCol",`
			`"evalAt",`
			`"labelGain",`
			`"maxPosition",`
			`]`