mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-11 01:06:08 +00:00

* add basic support to Spark dataframe add support to SynapseML LightGBM model update to pyspark>=3.2.0 to leverage pandas_on_Spark API * clean code, add TODOs * add sample_train_data for pyspark.pandas dataframe, fix bugs * improve some functions, fix bugs * fix dict change size during iteration * update model predict * update LightGBM model, update test * update SynapseML LightGBM params * update synapseML and tests * update TODOs * Added support to roc_auc for spark models * Added support to score of spark estimator * Added test for automl score of spark estimator * Added cv support to pyspark.pandas dataframe * Update test, fix bugs * Added tests * Updated docs, tests, added a notebook * Fix bugs in non-spark env * Fix bugs and improve tests * Fix uninstall pyspark * Fix tests error * Fix java.lang.OutOfMemoryError: Java heap space * Fix test_performance * Update test_sparkml to test_0sparkml to use the expected spark conf * Remove unnecessary widgets in notebook * Fix iloc java.lang.StackOverflowError * fix pre-commit * Added params check for spark dataframes * Refactor code for train_test_split to a function * Update train_test_split_pyspark * Refactor if-else, remove unnecessary code * Remove y from predict, remove mem control from n_iter compute * Update workflow * Improve _split_pyspark * Fix test failure of too short training time * Fix typos, improve docstrings * Fix index errors of pandas_on_spark, add spark loss metric * Fix typo of ndcgAtK * Update NDCG metrics and tests * Remove unuseful logger * Use cache and count to ensure consistent indexes * refactor for merge maain * fix errors of refactor * Updated SparkLightGBMEstimator and cache * Updated config2params * Remove unused import * Fix unknown parameters * Update default_estimator_list * Add unit tests for spark metrics
98 lines
2.1 KiB
Python
98 lines
2.1 KiB
Python
ParamList_LightGBM_Base = [
|
|
"baggingFraction",
|
|
"baggingFreq",
|
|
"baggingSeed",
|
|
"binSampleCount",
|
|
"boostFromAverage",
|
|
"boostingType",
|
|
"catSmooth",
|
|
"categoricalSlotIndexes",
|
|
"categoricalSlotNames",
|
|
"catl2",
|
|
"chunkSize",
|
|
"dataRandomSeed",
|
|
"defaultListenPort",
|
|
"deterministic",
|
|
"driverListenPort",
|
|
"dropRate",
|
|
"dropSeed",
|
|
"earlyStoppingRound",
|
|
"executionMode",
|
|
"extraSeed" "featureFraction",
|
|
"featureFractionByNode",
|
|
"featureFractionSeed",
|
|
"featuresCol",
|
|
"featuresShapCol",
|
|
"fobj" "improvementTolerance",
|
|
"initScoreCol",
|
|
"isEnableSparse",
|
|
"isProvideTrainingMetric",
|
|
"labelCol",
|
|
"lambdaL1",
|
|
"lambdaL2",
|
|
"leafPredictionCol",
|
|
"learningRate",
|
|
"matrixType",
|
|
"maxBin",
|
|
"maxBinByFeature",
|
|
"maxCatThreshold",
|
|
"maxCatToOnehot",
|
|
"maxDeltaStep",
|
|
"maxDepth",
|
|
"maxDrop",
|
|
"metric",
|
|
"microBatchSize",
|
|
"minDataInLeaf",
|
|
"minDataPerBin",
|
|
"minDataPerGroup",
|
|
"minGainToSplit",
|
|
"minSumHessianInLeaf",
|
|
"modelString",
|
|
"monotoneConstraints",
|
|
"monotoneConstraintsMethod",
|
|
"monotonePenalty",
|
|
"negBaggingFraction",
|
|
"numBatches",
|
|
"numIterations",
|
|
"numLeaves",
|
|
"numTasks",
|
|
"numThreads",
|
|
"objectiveSeed",
|
|
"otherRate",
|
|
"parallelism",
|
|
"passThroughArgs",
|
|
"posBaggingFraction",
|
|
"predictDisableShapeCheck",
|
|
"predictionCol",
|
|
"repartitionByGroupingColumn",
|
|
"seed",
|
|
"skipDrop",
|
|
"slotNames",
|
|
"timeout",
|
|
"topK",
|
|
"topRate",
|
|
"uniformDrop",
|
|
"useBarrierExecutionMode",
|
|
"useMissing",
|
|
"useSingleDatasetMode",
|
|
"validationIndicatorCol",
|
|
"verbosity",
|
|
"weightCol",
|
|
"xGBoostDartMode",
|
|
"zeroAsMissing",
|
|
"objective",
|
|
]
|
|
ParamList_LightGBM_Classifier = ParamList_LightGBM_Base + [
|
|
"isUnbalance",
|
|
"probabilityCol",
|
|
"rawPredictionCol",
|
|
"thresholds",
|
|
]
|
|
ParamList_LightGBM_Regressor = ParamList_LightGBM_Base + ["tweedieVariancePower"]
|
|
ParamList_LightGBM_Ranker = ParamList_LightGBM_Base + [
|
|
"groupCol",
|
|
"evalAt",
|
|
"labelGain",
|
|
"maxPosition",
|
|
]
|