2020-12-04 09:40:27 -08:00
|
|
|
import setuptools
|
|
|
|
import os
|
|
|
|
|
|
|
|
here = os.path.abspath(os.path.dirname(__file__))
|
|
|
|
|
2022-01-02 20:12:34 -05:00
|
|
|
with open("README.md", "r", encoding="UTF-8") as fh:
|
2020-12-04 09:40:27 -08:00
|
|
|
long_description = fh.read()
|
|
|
|
|
|
|
|
|
|
|
|
# Get the code version
|
|
|
|
version = {}
|
|
|
|
with open(os.path.join(here, "flaml/version.py")) as fp:
|
|
|
|
exec(fp.read(), version)
|
|
|
|
__version__ = version["__version__"]
|
|
|
|
|
|
|
|
install_requires = [
|
2022-05-24 02:23:00 +08:00
|
|
|
"NumPy>=1.17.0rc1",
|
2021-09-10 16:39:16 -07:00
|
|
|
]
|
2020-12-04 09:40:27 -08:00
|
|
|
|
|
|
|
|
|
|
|
setuptools.setup(
|
|
|
|
name="FLAML",
|
|
|
|
version=__version__,
|
|
|
|
author="Microsoft Corporation",
|
|
|
|
author_email="hpo@microsoft.com",
|
2021-12-16 17:11:33 -08:00
|
|
|
description="A fast library for automated machine learning and tuning",
|
2020-12-04 09:40:27 -08:00
|
|
|
long_description=long_description,
|
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
url="https://github.com/microsoft/FLAML",
|
2021-09-11 21:19:18 -07:00
|
|
|
packages=setuptools.find_packages(include=["flaml*"]),
|
2022-05-31 04:52:27 +00:00
|
|
|
package_data={
|
|
|
|
"flaml.default": ["*/*.json"],
|
|
|
|
},
|
|
|
|
include_package_data=True,
|
2020-12-04 09:40:27 -08:00
|
|
|
install_requires=install_requires,
|
|
|
|
extras_require={
|
2023-05-24 16:55:04 -07:00
|
|
|
"automl": [
|
|
|
|
"lightgbm>=2.3.1",
|
|
|
|
"xgboost>=0.90",
|
|
|
|
"scipy>=1.4.1",
|
|
|
|
"pandas>=1.1.4",
|
|
|
|
"scikit-learn>=0.24",
|
|
|
|
],
|
2020-12-04 09:40:27 -08:00
|
|
|
"notebook": [
|
|
|
|
"jupyter",
|
|
|
|
],
|
2022-12-24 00:18:49 +08:00
|
|
|
"spark": [
|
Support spark dataframe as input dataset and spark models as estimators (#934)
* add basic support to Spark dataframe
add support to SynapseML LightGBM model
update to pyspark>=3.2.0 to leverage pandas_on_Spark API
* clean code, add TODOs
* add sample_train_data for pyspark.pandas dataframe, fix bugs
* improve some functions, fix bugs
* fix dict change size during iteration
* update model predict
* update LightGBM model, update test
* update SynapseML LightGBM params
* update synapseML and tests
* update TODOs
* Added support to roc_auc for spark models
* Added support to score of spark estimator
* Added test for automl score of spark estimator
* Added cv support to pyspark.pandas dataframe
* Update test, fix bugs
* Added tests
* Updated docs, tests, added a notebook
* Fix bugs in non-spark env
* Fix bugs and improve tests
* Fix uninstall pyspark
* Fix tests error
* Fix java.lang.OutOfMemoryError: Java heap space
* Fix test_performance
* Update test_sparkml to test_0sparkml to use the expected spark conf
* Remove unnecessary widgets in notebook
* Fix iloc java.lang.StackOverflowError
* fix pre-commit
* Added params check for spark dataframes
* Refactor code for train_test_split to a function
* Update train_test_split_pyspark
* Refactor if-else, remove unnecessary code
* Remove y from predict, remove mem control from n_iter compute
* Update workflow
* Improve _split_pyspark
* Fix test failure of too short training time
* Fix typos, improve docstrings
* Fix index errors of pandas_on_spark, add spark loss metric
* Fix typo of ndcgAtK
* Update NDCG metrics and tests
* Remove unuseful logger
* Use cache and count to ensure consistent indexes
* refactor for merge maain
* fix errors of refactor
* Updated SparkLightGBMEstimator and cache
* Updated config2params
* Remove unused import
* Fix unknown parameters
* Update default_estimator_list
* Add unit tests for spark metrics
2023-03-26 03:59:46 +08:00
|
|
|
"pyspark>=3.2.0",
|
2022-12-24 00:18:49 +08:00
|
|
|
"joblibspark>=0.5.0",
|
|
|
|
],
|
2020-12-04 09:40:27 -08:00
|
|
|
"test": [
|
2023-05-24 16:55:04 -07:00
|
|
|
"lightgbm>=2.3.1",
|
|
|
|
"xgboost>=0.90",
|
|
|
|
"scipy>=1.4.1",
|
|
|
|
"pandas>=1.1.4",
|
|
|
|
"scikit-learn>=0.24",
|
2022-10-09 13:18:15 -04:00
|
|
|
"thop",
|
2020-12-04 09:40:27 -08:00
|
|
|
"pytest>=6.1.1",
|
|
|
|
"coverage>=5.3",
|
2021-09-10 16:39:16 -07:00
|
|
|
"pre-commit",
|
2022-10-10 01:15:17 +00:00
|
|
|
"torch",
|
|
|
|
"torchvision",
|
2023-05-02 22:19:56 +08:00
|
|
|
"catboost>=0.26,<1.2",
|
2020-12-15 08:10:43 -08:00
|
|
|
"rgf-python",
|
2021-07-24 20:10:43 -04:00
|
|
|
"optuna==2.8.0",
|
2023-05-24 16:55:04 -07:00
|
|
|
"openml",
|
2021-09-10 16:39:16 -07:00
|
|
|
"statsmodels>=0.12.2",
|
2021-11-03 19:08:23 -07:00
|
|
|
"psutil==5.8.0",
|
2021-11-16 14:06:20 -05:00
|
|
|
"dataclasses",
|
2023-04-15 22:52:40 +08:00
|
|
|
"transformers[torch]==4.26",
|
2021-12-20 17:19:32 -05:00
|
|
|
"datasets",
|
|
|
|
"nltk",
|
|
|
|
"rouge_score",
|
2022-01-07 02:12:38 -05:00
|
|
|
"hcrystalball==0.1.10",
|
2022-01-03 13:44:10 -05:00
|
|
|
"seqeval",
|
2022-11-27 11:22:54 -08:00
|
|
|
"pytorch-forecasting>=0.9.0,<=0.10.1",
|
|
|
|
"mlflow",
|
Support spark dataframe as input dataset and spark models as estimators (#934)
* add basic support to Spark dataframe
add support to SynapseML LightGBM model
update to pyspark>=3.2.0 to leverage pandas_on_Spark API
* clean code, add TODOs
* add sample_train_data for pyspark.pandas dataframe, fix bugs
* improve some functions, fix bugs
* fix dict change size during iteration
* update model predict
* update LightGBM model, update test
* update SynapseML LightGBM params
* update synapseML and tests
* update TODOs
* Added support to roc_auc for spark models
* Added support to score of spark estimator
* Added test for automl score of spark estimator
* Added cv support to pyspark.pandas dataframe
* Update test, fix bugs
* Added tests
* Updated docs, tests, added a notebook
* Fix bugs in non-spark env
* Fix bugs and improve tests
* Fix uninstall pyspark
* Fix tests error
* Fix java.lang.OutOfMemoryError: Java heap space
* Fix test_performance
* Update test_sparkml to test_0sparkml to use the expected spark conf
* Remove unnecessary widgets in notebook
* Fix iloc java.lang.StackOverflowError
* fix pre-commit
* Added params check for spark dataframes
* Refactor code for train_test_split to a function
* Update train_test_split_pyspark
* Refactor if-else, remove unnecessary code
* Remove y from predict, remove mem control from n_iter compute
* Update workflow
* Improve _split_pyspark
* Fix test failure of too short training time
* Fix typos, improve docstrings
* Fix index errors of pandas_on_spark, add spark loss metric
* Fix typo of ndcgAtK
* Update NDCG metrics and tests
* Remove unuseful logger
* Use cache and count to ensure consistent indexes
* refactor for merge maain
* fix errors of refactor
* Updated SparkLightGBMEstimator and cache
* Updated config2params
* Remove unused import
* Fix unknown parameters
* Update default_estimator_list
* Add unit tests for spark metrics
2023-03-26 03:59:46 +08:00
|
|
|
"pyspark>=3.2.0",
|
2022-12-24 00:18:49 +08:00
|
|
|
"joblibspark>=0.5.0",
|
|
|
|
"nbconvert",
|
|
|
|
"nbformat",
|
2023-01-07 18:39:29 -08:00
|
|
|
"ipykernel",
|
2023-02-16 15:38:50 -08:00
|
|
|
"pytorch-lightning<1.9.1", # test_forecast_panel
|
2023-05-04 19:48:31 -07:00
|
|
|
"requests<2.29.0", # https://github.com/docker/docker-py/issues/3113
|
2023-05-24 16:55:04 -07:00
|
|
|
"packaging",
|
2021-02-05 21:41:14 -08:00
|
|
|
],
|
2023-05-02 13:38:23 -07:00
|
|
|
"catboost": ["catboost>=0.26"],
|
2021-09-10 16:39:16 -07:00
|
|
|
"blendsearch": ["optuna==2.8.0"],
|
2021-02-05 21:41:14 -08:00
|
|
|
"ray": [
|
2022-06-26 08:32:28 -07:00
|
|
|
"ray[tune]~=1.13",
|
2021-02-05 21:41:14 -08:00
|
|
|
],
|
|
|
|
"azureml": [
|
2021-02-28 12:43:43 -08:00
|
|
|
"azureml-mlflow",
|
2020-12-04 09:40:27 -08:00
|
|
|
],
|
2021-02-28 12:43:43 -08:00
|
|
|
"nni": [
|
|
|
|
"nni",
|
2021-04-06 11:37:52 -07:00
|
|
|
],
|
2021-06-02 22:08:24 -04:00
|
|
|
"vw": [
|
2022-08-15 23:16:11 -04:00
|
|
|
"vowpalwabbit>=8.10.0, <9.0.0",
|
2023-05-24 16:55:04 -07:00
|
|
|
"scikit-learn",
|
2021-06-14 17:11:40 -04:00
|
|
|
],
|
2023-03-05 08:40:57 -08:00
|
|
|
"hf": [
|
|
|
|
"transformers[torch]==4.26",
|
|
|
|
"datasets",
|
|
|
|
"nltk",
|
|
|
|
"rouge_score",
|
|
|
|
"seqeval",
|
|
|
|
],
|
|
|
|
"nlp": [ # for backward compatibility; hf is the new option name
|
2023-02-17 18:52:36 -05:00
|
|
|
"transformers[torch]==4.26",
|
2022-01-05 13:49:12 -08:00
|
|
|
"datasets",
|
|
|
|
"nltk",
|
|
|
|
"rouge_score",
|
2022-06-26 08:32:28 -07:00
|
|
|
"seqeval",
|
2022-01-05 13:49:12 -08:00
|
|
|
],
|
2022-01-24 21:39:36 -05:00
|
|
|
"ts_forecast": [
|
2022-06-05 09:53:18 -07:00
|
|
|
"holidays<0.14", # to prevent installation error for prophet
|
|
|
|
"prophet>=1.0.1",
|
|
|
|
"statsmodels>=0.12.2",
|
|
|
|
"hcrystalball==0.1.10",
|
|
|
|
],
|
|
|
|
"forecast": [
|
|
|
|
"holidays<0.14", # to prevent installation error for prophet
|
2022-01-24 21:39:36 -05:00
|
|
|
"prophet>=1.0.1",
|
|
|
|
"statsmodels>=0.12.2",
|
|
|
|
"hcrystalball==0.1.10",
|
2022-08-12 11:39:22 -04:00
|
|
|
"pytorch-forecasting>=0.9.0",
|
2022-01-24 21:39:36 -05:00
|
|
|
],
|
2023-06-13 04:25:41 -04:00
|
|
|
"benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3", "pandas==1.1.4"],
|
2023-04-23 04:50:29 -07:00
|
|
|
"openai": ["openai==0.27.4", "diskcache"],
|
|
|
|
"autogen": ["openai==0.27.4", "diskcache", "docker"],
|
2023-05-24 16:55:04 -07:00
|
|
|
"synapse": [
|
|
|
|
"joblibspark>=0.5.0",
|
|
|
|
"optuna==2.8.0",
|
|
|
|
"pyspark>=3.2.0",
|
|
|
|
],
|
|
|
|
"autozero": ["scikit-learn", "pandas", "packaging"],
|
2020-12-04 09:40:27 -08:00
|
|
|
},
|
|
|
|
classifiers=[
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"License :: OSI Approved :: MIT License",
|
2021-04-08 09:29:55 -07:00
|
|
|
"Operating System :: OS Independent",
|
2020-12-04 09:40:27 -08:00
|
|
|
],
|
2022-06-26 08:32:28 -07:00
|
|
|
python_requires=">=3.6",
|
2020-12-04 09:40:27 -08:00
|
|
|
)
|