autogen/notebook/automl_nlp.ipynb

{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "43f7-wG-Tjg_"
   },
   "source": [
    "# FineTuning NLP Models with FLAML Library\n",
    "\n",
    "\n",
    "## 1. Introduction\n",
    "\n",
    "FLAML is a Python library (https://github.com/microsoft/FLAML) designed to automatically produce accurate machine learning models \n",
    "with low computational cost. It is fast and economical. The simple and lightweight design makes it easy to use and extend, such as adding new learners. FLAML can \n",
    "- serve as an economical AutoML engine,\n",
    "- be used as a fast hyperparameter tuning tool, or \n",
    "- be embedded in self-tuning software that requires low latency & resource in repetitive\n",
    "   tuning tasks.\n",
    "\n",
    "In this notebook, we demonstrate how to use the FLAML library to fine tune an NLP language model with hyperparameter search. We will use [flaml.tune](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function) with the built in GPU in colab for the tuning. However, if you have a machine with more than 1 GPU, you can also use FLAML's [parallel tuning](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#parallel-tuning) with the ray tune option. \n",
    "\n",
    "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the `[automl,hf,blendsearch]` option:\n",
    "```bash\n",
    "pip install flaml[automl,hf,blendsearch]; \n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Q8c3VMy6TjhC",
    "outputId": "3584a81d-f26e-4eb9-9929-629cfff97ee9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting flaml[blendsearch,notebook,ray]\n",
      "  Downloading FLAML-1.2.0-py3-none-any.whl (250 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.4/250.4 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: scikit-learn>=0.24 in /usr/local/lib/python3.9/dist-packages (from flaml[blendsearch,notebook,ray]) (1.2.2)\n",
      "Requirement already satisfied: xgboost>=0.90 in /usr/local/lib/python3.9/dist-packages (from flaml[blendsearch,notebook,ray]) (1.7.5)\n",
      "Requirement already satisfied: NumPy>=1.17.0rc1 in /usr/local/lib/python3.9/dist-packages (from flaml[blendsearch,notebook,ray]) (1.22.4)\n",
      "Requirement already satisfied: pandas>=1.1.4 in /usr/local/lib/python3.9/dist-packages (from flaml[blendsearch,notebook,ray]) (1.5.3)\n",
      "Requirement already satisfied: lightgbm>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from flaml[blendsearch,notebook,ray]) (3.3.5)\n",
      "Requirement already satisfied: scipy>=1.4.1 in /usr/local/lib/python3.9/dist-packages (from flaml[blendsearch,notebook,ray]) (1.10.1)\n",
      "Collecting optuna==2.8.0\n",
      "  Downloading optuna-2.8.0-py3-none-any.whl (301 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting ray[tune]~=1.13\n",
      "  Downloading ray-1.13.0-cp39-cp39-manylinux2014_x86_64.whl (54.3 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.3/54.3 MB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting openml==0.10.2\n",
      "  Downloading openml-0.10.2.tar.gz (158 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m159.0/159.0 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Collecting jupyter\n",
      "  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)\n",
      "Requirement already satisfied: matplotlib in /usr/local/lib/python3.9/dist-packages (from flaml[blendsearch,notebook,ray]) (3.7.1)\n",
      "Collecting liac-arff>=2.4.0\n",
      "  Downloading liac-arff-2.5.0.tar.gz (13 kB)\n",
      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Collecting xmltodict\n",
      "  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from openml==0.10.2->flaml[blendsearch,notebook,ray]) (2.27.1)\n",
      "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from openml==0.10.2->flaml[blendsearch,notebook,ray]) (2.8.2)\n",
      "Collecting alembic\n",
      "  Downloading alembic-1.10.3-py3-none-any.whl (212 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.3/212.3 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting cliff\n",
      "  Downloading cliff-4.2.0-py3-none-any.whl (81 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.0/81.0 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: sqlalchemy>=1.1.0 in /usr/local/lib/python3.9/dist-packages (from optuna==2.8.0->flaml[blendsearch,notebook,ray]) (2.0.9)\n",
      "Collecting cmaes>=0.8.2\n",
      "  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from optuna==2.8.0->flaml[blendsearch,notebook,ray]) (23.0)\n",
      "Collecting colorlog\n",
      "  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)\n",
      "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from optuna==2.8.0->flaml[blendsearch,notebook,ray]) (4.65.0)\n",
      "Requirement already satisfied: wheel in /usr/local/lib/python3.9/dist-packages (from lightgbm>=2.3.1->flaml[blendsearch,notebook,ray]) (0.40.0)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=1.1.4->flaml[blendsearch,notebook,ray]) (2022.7.1)\n",
      "Requirement already satisfied: jsonschema in /usr/local/lib/python3.9/dist-packages (from ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (4.3.3)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (3.11.0)\n",
      "Collecting click<=8.0.4,>=7.0\n",
      "  Downloading click-8.0.4-py3-none-any.whl (97 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.5/97.5 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting frozenlist\n",
      "  Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting grpcio<=1.43.0,>=1.28.1\n",
      "  Downloading grpcio-1.43.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.1/4.1 MB\u001b[0m \u001b[31m45.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting virtualenv\n",
      "  Downloading virtualenv-20.21.0-py3-none-any.whl (8.7 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.7/8.7 MB\u001b[0m \u001b[31m43.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (1.0.5)\n",
      "Requirement already satisfied: protobuf<4.0.0,>=3.15.3 in /usr/local/lib/python3.9/dist-packages (from ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (3.20.3)\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.9/dist-packages (from ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (6.0)\n",
      "Requirement already satisfied: attrs in /usr/local/lib/python3.9/dist-packages (from ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (22.2.0)\n",
      "Collecting aiosignal\n",
      "  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
      "Requirement already satisfied: tabulate in /usr/local/lib/python3.9/dist-packages (from ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (0.8.10)\n",
      "Collecting tensorboardX>=1.9\n",
      "  Downloading tensorboardX-2.6-py2.py3-none-any.whl (114 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from scikit-learn>=0.24->flaml[blendsearch,notebook,ray]) (1.2.0)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn>=0.24->flaml[blendsearch,notebook,ray]) (3.1.0)\n",
      "Requirement already satisfied: jupyter-console in /usr/local/lib/python3.9/dist-packages (from jupyter->flaml[blendsearch,notebook,ray]) (6.1.0)\n",
      "Collecting qtconsole\n",
      "  Downloading qtconsole-5.4.2-py3-none-any.whl (121 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.2/121.2 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: nbconvert in /usr/local/lib/python3.9/dist-packages (from jupyter->flaml[blendsearch,notebook,ray]) (6.5.4)\n",
      "Requirement already satisfied: ipywidgets in /usr/local/lib/python3.9/dist-packages (from jupyter->flaml[blendsearch,notebook,ray]) (7.7.1)\n",
      "Requirement already satisfied: ipykernel in /usr/local/lib/python3.9/dist-packages (from jupyter->flaml[blendsearch,notebook,ray]) (5.5.6)\n",
      "Requirement already satisfied: notebook in /usr/local/lib/python3.9/dist-packages (from jupyter->flaml[blendsearch,notebook,ray]) (6.4.8)\n",
      "Requirement already satisfied: importlib-resources>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->flaml[blendsearch,notebook,ray]) (5.12.0)\n",
      "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->flaml[blendsearch,notebook,ray]) (8.4.0)\n",
      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->flaml[blendsearch,notebook,ray]) (3.0.9)\n",
      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib->flaml[blendsearch,notebook,ray]) (0.11.0)\n",
      "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->flaml[blendsearch,notebook,ray]) (1.4.4)\n",
      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->flaml[blendsearch,notebook,ray]) (1.0.7)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->flaml[blendsearch,notebook,ray]) (4.39.3)\n",
      "Requirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.9/dist-packages (from grpcio<=1.43.0,>=1.28.1->ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (1.16.0)\n",
      "Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.9/dist-packages (from importlib-resources>=3.2.0->matplotlib->flaml[blendsearch,notebook,ray]) (3.15.0)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.9/dist-packages (from sqlalchemy>=1.1.0->optuna==2.8.0->flaml[blendsearch,notebook,ray]) (2.0.2)\n",
      "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from sqlalchemy>=1.1.0->optuna==2.8.0->flaml[blendsearch,notebook,ray]) (4.5.0)\n",
      "Collecting Mako\n",
      "  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.7/78.7 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: PrettyTable>=0.7.2 in /usr/local/lib/python3.9/dist-packages (from cliff->optuna==2.8.0->flaml[blendsearch,notebook,ray]) (0.7.2)\n",
      "Collecting autopage>=0.4.0\n",
      "  Downloading autopage-0.5.1-py3-none-any.whl (29 kB)\n",
      "Collecting cmd2>=1.0.0\n",
      "  Downloading cmd2-2.4.3-py3-none-any.whl (147 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.2/147.2 kB\u001b[0m \u001b[31m19.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting stevedore>=2.0.1\n",
      "  Downloading stevedore-5.0.0-py3-none-any.whl (49 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.6/49.6 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: importlib-metadata>=4.4 in /usr/local/lib/python3.9/dist-packages (from cliff->optuna==2.8.0->flaml[blendsearch,notebook,ray]) (6.2.0)\n",
      "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.9/dist-packages (from ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (6.2)\n",
      "Requirement already satisfied: ipython>=5.0.0 in /usr/local/lib/python3.9/dist-packages (from ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (7.34.0)\n",
      "Requirement already satisfied: traitlets>=4.1.0 in /usr/local/lib/python3.9/dist-packages (from ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (5.7.1)\n",
      "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.9/dist-packages (from ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (0.2.0)\n",
      "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.9/dist-packages (from ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (6.1.12)\n",
      "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from ipywidgets->jupyter->flaml[blendsearch,notebook,ray]) (3.0.7)\n",
      "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.9/dist-packages (from ipywidgets->jupyter->flaml[blendsearch,notebook,ray]) (3.6.4)\n",
      "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.9/dist-packages (from jsonschema->ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (0.19.3)\n",
      "Requirement already satisfied: pygments in /usr/local/lib/python3.9/dist-packages (from jupyter-console->jupyter->flaml[blendsearch,notebook,ray]) (2.14.0)\n",
      "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from jupyter-console->jupyter->flaml[blendsearch,notebook,ray]) (3.0.38)\n",
      "Requirement already satisfied: lxml in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (4.9.2)\n",
      "Requirement already satisfied: jinja2>=3.0 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (3.1.2)\n",
      "Requirement already satisfied: jupyter-core>=4.7 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (5.3.0)\n",
      "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (0.7.3)\n",
      "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (0.4)\n",
      "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (1.5.0)\n",
      "Requirement already satisfied: bleach in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (6.0.0)\n",
      "Requirement already satisfied: defusedxml in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (0.7.1)\n",
      "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (1.2.1)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (2.1.2)\n",
      "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (0.2.2)\n",
      "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (0.8.4)\n",
      "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (4.11.2)\n",
      "Requirement already satisfied: nbformat>=5.1 in /usr/local/lib/python3.9/dist-packages (from nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (5.8.0)\n",
      "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.9/dist-packages (from notebook->jupyter->flaml[blendsearch,notebook,ray]) (1.5.6)\n",
      "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.9/dist-packages (from notebook->jupyter->flaml[blendsearch,notebook,ray]) (1.8.0)\n",
      "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.9/dist-packages (from notebook->jupyter->flaml[blendsearch,notebook,ray]) (0.16.0)\n",
      "Requirement already satisfied: pyzmq>=17 in /usr/local/lib/python3.9/dist-packages (from notebook->jupyter->flaml[blendsearch,notebook,ray]) (23.2.1)\n",
      "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.9/dist-packages (from notebook->jupyter->flaml[blendsearch,notebook,ray]) (21.3.0)\n",
      "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.9/dist-packages (from notebook->jupyter->flaml[blendsearch,notebook,ray]) (0.17.1)\n",
      "Collecting qtpy>=2.0.1\n",
      "  Downloading QtPy-2.3.1-py3-none-any.whl (84 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.9/84.9 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->openml==0.10.2->flaml[blendsearch,notebook,ray]) (3.4)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->openml==0.10.2->flaml[blendsearch,notebook,ray]) (2.0.12)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->openml==0.10.2->flaml[blendsearch,notebook,ray]) (2022.12.7)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->openml==0.10.2->flaml[blendsearch,notebook,ray]) (1.26.15)\n",
      "Collecting distlib<1,>=0.3.6\n",
      "  Downloading distlib-0.3.6-py2.py3-none-any.whl (468 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.5/468.5 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: platformdirs<4,>=2.4 in /usr/local/lib/python3.9/dist-packages (from virtualenv->ray[tune]~=1.13->flaml[blendsearch,notebook,ray]) (3.2.0)\n",
      "Requirement already satisfied: wcwidth>=0.1.7 in /usr/local/lib/python3.9/dist-packages (from cmd2>=1.0.0->cliff->optuna==2.8.0->flaml[blendsearch,notebook,ray]) (0.2.6)\n",
      "Collecting pyperclip>=1.6\n",
      "  Downloading pyperclip-1.8.2.tar.gz (20 kB)\n",
      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Requirement already satisfied: pickleshare in /usr/local/lib/python3.9/dist-packages (from ipython>=5.0.0->ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (0.7.5)\n",
      "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.9/dist-packages (from ipython>=5.0.0->ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (67.6.1)\n",
      "Requirement already satisfied: backcall in /usr/local/lib/python3.9/dist-packages (from ipython>=5.0.0->ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (0.2.0)\n",
      "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.9/dist-packages (from ipython>=5.0.0->ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (0.1.6)\n",
      "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.9/dist-packages (from ipython>=5.0.0->ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (4.8.0)\n",
      "Requirement already satisfied: decorator in /usr/local/lib/python3.9/dist-packages (from ipython>=5.0.0->ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (4.4.2)\n",
      "Collecting jedi>=0.16\n",
      "  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m82.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: fastjsonschema in /usr/local/lib/python3.9/dist-packages (from nbformat>=5.1->nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (2.16.3)\n",
      "Collecting pbr!=2.1.0,>=2.0.0\n",
      "  Downloading pbr-5.11.1-py2.py3-none-any.whl (112 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.7/112.7 kB\u001b[0m \u001b[31m18.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: ptyprocess in /usr/local/lib/python3.9/dist-packages (from terminado>=0.8.3->notebook->jupyter->flaml[blendsearch,notebook,ray]) (0.7.0)\n",
      "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.9/dist-packages (from argon2-cffi->notebook->jupyter->flaml[blendsearch,notebook,ray]) (21.2.0)\n",
      "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.9/dist-packages (from beautifulsoup4->nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (2.4)\n",
      "Requirement already satisfied: webencodings in /usr/local/lib/python3.9/dist-packages (from bleach->nbconvert->jupyter->flaml[blendsearch,notebook,ray]) (0.5.1)\n",
      "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /usr/local/lib/python3.9/dist-packages (from jedi>=0.16->ipython>=5.0.0->ipykernel->jupyter->flaml[blendsearch,notebook,ray]) (0.8.3)\n",
      "Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook->jupyter->flaml[blendsearch,notebook,ray]) (1.15.1)\n",
      "Requirement already satisfied: pycparser in /usr/local/lib/python3.9/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook->jupyter->flaml[blendsearch,notebook,ray]) (2.21)\n",
      "Building wheels for collected packages: openml, liac-arff, pyperclip\n",
      "  Building wheel for openml (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for openml: filename=openml-0.10.2-py3-none-any.whl size=190321 sha256=6384a6a98dcf21a054e2457f2a12e83e7f09122e873ed8dab894d7a4649b869b\n",
      "  Stored in directory: /root/.cache/pip/wheels/90/70/b9/37e0bd30dd46291f37d970e2032d557d7eb36b6ccabe47419c\n",
      "  Building wheel for liac-arff (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for liac-arff: filename=liac_arff-2.5.0-py3-none-any.whl size=11732 sha256=45f0543f0ec70558329ca4338de37f0feb6b093e730eed20921f38040916fbf3\n",
      "  Stored in directory: /root/.cache/pip/wheels/08/82/8b/5c514221984e88c059b94e36a71d4722e590acaae04deab22e\n",
      "  Building wheel for pyperclip (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11135 sha256=b59846b5e39f6f668d74e06e57b7ceaded7c46beffc70dc391b71c02c6425afb\n",
      "  Stored in directory: /root/.cache/pip/wheels/0c/09/9e/49e21a6840ef7955b06d47394afef0058f0378c0914e48b8b8\n",
      "Successfully built openml liac-arff pyperclip\n",
      "Installing collected packages: pyperclip, distlib, xmltodict, virtualenv, tensorboardX, qtpy, pbr, Mako, liac-arff, jedi, grpcio, frozenlist, colorlog, cmd2, cmaes, click, autopage, stevedore, alembic, aiosignal, ray, openml, cliff, qtconsole, optuna, flaml, jupyter\n",
      "  Attempting uninstall: grpcio\n",
      "    Found existing installation: grpcio 1.53.0\n",
      "    Uninstalling grpcio-1.53.0:\n",
      "      Successfully uninstalled grpcio-1.53.0\n",
      "  Attempting uninstall: click\n",
      "    Found existing installation: click 8.1.3\n",
      "    Uninstalling click-8.1.3:\n",
      "      Successfully uninstalled click-8.1.3\n",
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "tensorboard 2.12.1 requires grpcio>=1.48.2, but you have grpcio 1.43.0 which is incompatible.\n",
      "grpcio-status 1.48.2 requires grpcio>=1.48.2, but you have grpcio 1.43.0 which is incompatible.\n",
      "google-cloud-bigquery 3.9.0 requires grpcio<2.0dev,>=1.47.0, but you have grpcio 1.43.0 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0mSuccessfully installed Mako-1.2.4 aiosignal-1.3.1 alembic-1.10.3 autopage-0.5.1 click-8.0.4 cliff-4.2.0 cmaes-0.9.1 cmd2-2.4.3 colorlog-6.7.0 distlib-0.3.6 flaml-1.2.0 frozenlist-1.3.3 grpcio-1.43.0 jedi-0.18.2 jupyter-1.0.0 liac-arff-2.5.0 openml-0.10.2 optuna-2.8.0 pbr-5.11.1 pyperclip-1.8.2 qtconsole-5.4.2 qtpy-2.3.1 ray-1.13.0 stevedore-5.0.0 tensorboardX-2.6 virtualenv-20.21.0 xmltodict-0.13.0\n"
     ]
    },
    {
     "data": {
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "string"
      },
      "text/plain": [
       "'1.2.0'"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%pip install flaml[automl,hf,blendsearch]\n",
    "import flaml\n",
    "flaml.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "lo1id59ntQX_",
    "outputId": "692c860d-d498-48f5-d983-f2d850f64bbb"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting transformers\n",
      "  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m67.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting huggingface-hub<1.0,>=0.11.0\n",
      "  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.1/200.1 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.27.1)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)\n",
      "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n",
      "  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.11.0)\n",
      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (3.4)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.0.12)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)\n",
      "Installing collected packages: tokenizers, huggingface-hub, transformers\n",
      "Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.27.4\n",
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting datasets\n",
      "  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.7/468.7 kB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from datasets) (1.5.3)\n",
      "Collecting responses<0.19\n",
      "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (6.0)\n",
      "Collecting dill<0.3.7,>=0.3.0\n",
      "  Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (9.0.0)\n",
      "Collecting xxhash\n",
      "  Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.2/212.2 kB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting multiprocess\n",
      "  Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.9/132.9 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (0.13.4)\n",
      "Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from datasets) (23.0)\n",
      "Collecting aiohttp\n",
      "  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from datasets) (1.22.4)\n",
      "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (4.65.0)\n",
      "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (2023.3.0)\n",
      "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (2.27.1)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (1.3.1)\n",
      "Collecting async-timeout<5.0,>=4.0.0a3\n",
      "  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (22.2.0)\n",
      "Collecting multidict<7.0,>=4.5\n",
      "  Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (2.0.12)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (1.3.3)\n",
      "Collecting yarl<2.0,>=1.0\n",
      "  Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 kB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (4.5.0)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (3.11.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets) (3.4)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets) (1.26.15)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets) (2022.7.1)\n",
      "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets) (2.8.2)\n",
      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n",
      "Installing collected packages: xxhash, multidict, dill, async-timeout, yarl, responses, multiprocess, aiohttp, datasets\n",
      "Successfully installed aiohttp-3.8.4 async-timeout-4.0.2 datasets-2.11.0 dill-0.3.6 multidict-6.0.4 multiprocess-0.70.14 responses-0.18.0 xxhash-3.2.0 yarl-1.8.2\n",
      "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
      "Collecting rouge_score\n",
      "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
      "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "Requirement already satisfied: absl-py in /usr/local/lib/python3.9/dist-packages (from rouge_score) (1.4.0)\n",
      "Requirement already satisfied: nltk in /usr/local/lib/python3.9/dist-packages (from rouge_score) (3.8.1)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from rouge_score) (1.22.4)\n",
      "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.9/dist-packages (from rouge_score) (1.16.0)\n",
      "Requirement already satisfied: click in /usr/local/lib/python3.9/dist-packages (from nltk->rouge_score) (8.0.4)\n",
      "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from nltk->rouge_score) (4.65.0)\n",
      "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from nltk->rouge_score) (1.2.0)\n",
      "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.9/dist-packages (from nltk->rouge_score) (2022.10.31)\n",
      "Building wheels for collected packages: rouge_score\n",
      "  Building wheel for rouge_score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
      "  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24954 sha256=4032d06ff03906dbf10b9d7bae49035b4d76498d3b86b286e1472939d2ee09b0\n",
      "  Stored in directory: /root/.cache/pip/wheels/9b/3d/39/09558097d3119ca0a4d462df68f22c6f3c1b345ac63a09b86e\n",
      "Successfully built rouge_score\n",
      "Installing collected packages: rouge_score\n",
      "Successfully installed rouge_score-0.1.2\n"
     ]
    }
   ],
   "source": [
    "%pip install transformers\n",
    "%pip install datasets\n",
    "%pip install rouge_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "j24pfyQktbln",
    "outputId": "29aa3747-5597-4528-b82a-95567b9020b9"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "string"
      },
      "text/plain": [
       "'4.27.4'"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import transformers\n",
    "transformers.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "efPlAWTdTjhD"
   },
   "source": [
    "Let's run some examples. To use CoLab's built in GPU, you need to select Runtime -> Change runtime type and select GPU. Then you can print the device information using:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "2kx9QbI7uaU8",
    "outputId": "c9ad909f-a2fe-4d4f-aabd-552c2505f09e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<torch.cuda.device object at 0x7fdb76c70fa0>]\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "print([torch.cuda.device(i) for i in range(torch.cuda.device_count())])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "-yEuLXoHua-f"
   },
   "source": [
    "Note: throughout this notebook, you may see a few ModuleNotFoundErrors. As long as the cell successfully executes, you can ignore that error."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ZBr83DYlTjhD"
   },
   "source": [
    "## 2. Sentiment Classification Example\n",
    "### Load data and preprocess\n",
    "\n",
    "The Stanford Sentiment treebank (SST-2) dataset is a dataset for sentiment classification. First, let's load this dataset into pandas dataframes:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "hGP2eqTBTjhD",
    "outputId": "2028b124-d720-49b6-ad8f-7cdf64d3f2bf"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9eb9517f746b49c69728f32c8a420816",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading builder script:   0%|          | 0.00/28.8k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9c718fbcb5e4adb80d56be430177143",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading metadata:   0%|          | 0.00/28.7k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cdfd229808b142c5a069d9c6bca79f1e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading readme:   0%|          | 0.00/27.9k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading and preparing dataset glue/sst2 to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e85a6d9656ee4d38bd43e72fd9dc0b6f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/7.44M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "116e256ce6a6402c825b0c421fcae089",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "657cc29c3f7345a0aae3b80ab9c698ef",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "62d3aece24b04df2b5b5e81f05459c1c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:datasets.builder:Found cached dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n",
      "WARNING:datasets.builder:Found cached dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "train_dataset = load_dataset(\"glue\", \"sst2\", split=\"train\").to_pandas()\n",
    "dev_dataset = load_dataset(\"glue\", \"sst2\", split=\"validation\").to_pandas()\n",
    "test_dataset = load_dataset(\"glue\", \"sst2\", split=\"test\").to_pandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Nb7SAWVLTjhE"
   },
   "source": [
    "Take a look at the first 5 examples of this dataset:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "65mLkoJhTjhE",
    "outputId": "cde84b57-f647-4b6d-c4f4-aafa7b2b53a3"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "  <div id=\"df-85394a2e-60f7-4c0f-ac29-8c8464a759ae\">\n",
       "    <div class=\"colab-df-container\">\n",
       "      <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentence</th>\n",
       "      <th>label</th>\n",
       "      <th>idx</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>hide new secretions from the parental units</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>contains no wit , only labored gags</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>that loves its characters and communicates som...</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>remains utterly satisfied to remain the same t...</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>on the worst revenge-of-the-nerds clichés the ...</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-85394a2e-60f7-4c0f-ac29-8c8464a759ae')\"\n",
       "              title=\"Convert this dataframe to an interactive table.\"\n",
       "              style=\"display:none;\">\n",
       "        \n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
       "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
       "  </svg>\n",
       "      </button>\n",
       "      \n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      flex-wrap:wrap;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "      <script>\n",
       "        const buttonEl =\n",
       "          document.querySelector('#df-85394a2e-60f7-4c0f-ac29-8c8464a759ae button.colab-df-convert');\n",
       "        buttonEl.style.display =\n",
       "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "        async function convertToInteractive(key) {\n",
       "          const element = document.querySelector('#df-85394a2e-60f7-4c0f-ac29-8c8464a759ae');\n",
       "          const dataTable =\n",
       "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                     [key], {});\n",
       "          if (!dataTable) return;\n",
       "\n",
       "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "            + ' to learn more about interactive tables.';\n",
       "          element.innerHTML = '';\n",
       "          dataTable['output_type'] = 'display_data';\n",
       "          await google.colab.output.renderOutput(dataTable, element);\n",
       "          const docLink = document.createElement('div');\n",
       "          docLink.innerHTML = docLinkHtml;\n",
       "          element.appendChild(docLink);\n",
       "        }\n",
       "      </script>\n",
       "    </div>\n",
       "  </div>\n",
       "  "
      ],
      "text/plain": [
       "                                            sentence  label  idx\n",
       "0       hide new secretions from the parental units       0    0\n",
       "1               contains no wit , only labored gags       0    1\n",
       "2  that loves its characters and communicates som...      1    2\n",
       "3  remains utterly satisfied to remain the same t...      0    3\n",
       "4  on the worst revenge-of-the-nerds clichés the ...      0    4"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_dataset.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ENcUQbOgTjhE"
   },
   "source": [
    "Separate the data into X and y:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "GA0VH9URTjhF"
   },
   "outputs": [],
   "source": [
    "custom_sent_keys = [\"sentence\"]          # specify the column names of the input sentences\n",
    "label_key = \"label\"                                    # specify the column name of the label\n",
    "\n",
    "X_train, y_train = train_dataset[custom_sent_keys], train_dataset[label_key]\n",
    "X_val, y_val = dev_dataset[custom_sent_keys], dev_dataset[label_key]\n",
    "X_test = test_dataset[custom_sent_keys]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "NpRqB153TjhF"
   },
   "source": [
    "### Run FLAML"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "2kXabqxZuzQl"
   },
   "source": [
    "Now we can run AutoML with FLAML:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "asYbkzrXTjhF"
   },
   "outputs": [],
   "source": [
    "from flaml import AutoML\n",
    "automl = AutoML()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "2XZmrBRru_A0"
   },
   "source": [
    "Let's run FLAML for 30 mins. Here we use Electra's [small model](https://huggingface.co/google/electra-small-discriminator) for the tuning. We set gpu_per_trial to 1, and n_concurrent_trials to 1 (the number of trials running at the same time). Make sure gpu_per_trial * n_concurrent_trials does not exceed the GPU number you have. While running you can observe the resource usage (including the GPU) on the right. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "QEvR2bZiTjhG"
   },
   "outputs": [],
   "source": [
    "MAX_ITER=20\n",
    "automl_settings = {\n",
    "    \"max_iter\": MAX_ITER,                  # setting the time budget\n",
    "    \"task\": \"seq-classification\",       # setting the task as seq-classification\n",
    "    \"fit_kwargs_by_estimator\": {\n",
    "        \"transformer\": {\n",
    "            \"output_dir\": \"data/output/\",   # setting the output directory\n",
    "            \"model_path\": \"google/electra-small-discriminator\",  # if model_path is not set, the default model is facebook/muppet-roberta-base: https://huggingface.co/facebook/muppet-roberta-base\n",
    "        }\n",
    "    },\n",
    "    \"gpu_per_trial\": 1,                 # using 1 GPU for each trial\n",
    "    \"log_file_name\": \"seqclass.log\",    # set the file to save the log for HPO\n",
    "    \"log_type\": \"all\",                  # the log type for trials: \"all\" if logging all the trials, \"better\" if only keeping the better trials\n",
    "    \"use_ray\": False,                   # If parallel tuning, set \"use_ray\" to {\"local_dir\": \"data/output/\"}\n",
    "    \"n_concurrent_trials\": 1,           # How many trials to run at the same time, n_concurrent_trials * gpu_per_trial must not exceed the total number of GPUs\n",
    "    \"keep_search_state\": True,          # keeping the search state\n",
    "  #  \"fp16\": False                       # whether to use fp16, this option is True by default. \n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "EXjF65hOTjhG",
    "outputId": "b7c524a1-3da1-49ae-caf2-9aec208ffc69"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 02:51:07] {1768} INFO - task = seq-classification\n",
      "[flaml.automl.logger: 04-12 02:51:07] {1775} INFO - Data split method: stratified\n",
      "[flaml.automl.logger: 04-12 02:51:07] {1778} INFO - Evaluation method: holdout\n",
      "[flaml.automl.logger: 04-12 02:51:07] {1891} INFO - Minimizing error metric: 1-accuracy\n",
      "[flaml.automl.logger: 04-12 02:51:07] {2011} INFO - List of ML learners in AutoML Run: ['transformer']\n",
      "[flaml.automl.logger: 04-12 02:51:07] {2341} INFO - iteration 0, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/flaml/automl/data.py:297: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  X[str_columns] = X[str_columns].astype(\"string\")\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9c7c478356f54c8d915d64dba5fa4f7e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ce0ce8336e9d4cd285ef4d8e25d1a632",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa566ddeb2a34d448247684f6f181ec5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0f8913c46e30455f882cc1ed771cf08f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "376203903043482f96bc749665168627",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading pytorch_model.bin:   0%|          | 0.00/54.2M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5731, 'learning_rate': 4.6751863684771026e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.43649888038635254, 'eval_automl_metric': 0.1754587155963303, 'eval_runtime': 13.2765, 'eval_samples_per_second': 65.68, 'eval_steps_per_second': 65.68, 'epoch': 2.0}\n",
      "{'eval_loss': 0.4060048460960388, 'eval_automl_metric': 0.16284403669724767, 'eval_runtime': 14.9968, 'eval_samples_per_second': 58.146, 'eval_steps_per_second': 58.146, 'epoch': 3.0}\n",
      "{'train_runtime': 97.6411, 'train_samples_per_second': 307.248, 'train_steps_per_second': 9.617, 'train_loss': 0.4901065034226488, 'epoch': 3.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 02:53:14] {2479} INFO - Estimated sufficient time budget=67349s. Estimated necessary time budget=67s.\n",
      "[flaml.automl.logger: 04-12 02:53:14] {2526} INFO -  at 127.6s,\testimator transformer's best error=0.1628,\tbest estimator transformer's best error=0.1628\n",
      "[flaml.automl.logger: 04-12 02:53:14] {2341} INFO - iteration 1, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.4873640537261963, 'eval_automl_metric': 0.18463302752293576, 'eval_runtime': 13.2764, 'eval_samples_per_second': 65.681, 'eval_steps_per_second': 65.681, 'epoch': 2.0}\n",
      "{'eval_loss': 0.4638785123825073, 'eval_automl_metric': 0.18119266055045868, 'eval_runtime': 13.1414, 'eval_samples_per_second': 66.355, 'eval_steps_per_second': 66.355, 'epoch': 3.0}\n",
      "{'train_runtime': 71.0618, 'train_samples_per_second': 422.168, 'train_steps_per_second': 6.628, 'train_loss': 0.5612566192691746, 'epoch': 3.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 02:54:43] {2526} INFO -  at 217.2s,\testimator transformer's best error=0.1628,\tbest estimator transformer's best error=0.1628\n",
      "[flaml.automl.logger: 04-12 02:54:43] {2341} INFO - iteration 2, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5903, 'learning_rate': 7.550901222797876e-06, 'epoch': 0.8}\n",
      "{'loss': 0.3877, 'learning_rate': 4.805118959962285e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.36965879797935486, 'eval_automl_metric': 0.1513761467889908, 'eval_runtime': 13.4161, 'eval_samples_per_second': 64.996, 'eval_steps_per_second': 64.996, 'epoch': 2.0}\n",
      "{'loss': 0.3432, 'learning_rate': 2.0593366971266936e-06, 'epoch': 2.4}\n",
      "{'eval_loss': 0.371982604265213, 'eval_automl_metric': 0.1513761467889908, 'eval_runtime': 13.2983, 'eval_samples_per_second': 65.572, 'eval_steps_per_second': 65.572, 'epoch': 3.0}\n",
      "{'train_runtime': 135.4608, 'train_samples_per_second': 221.466, 'train_steps_per_second': 13.842, 'train_loss': 0.41677737223307293, 'epoch': 3.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 02:57:20] {2526} INFO -  at 373.7s,\testimator transformer's best error=0.1514,\tbest estimator transformer's best error=0.1514\n",
      "[flaml.automl.logger: 04-12 02:57:20] {2341} INFO - iteration 3, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5324, 'learning_rate': 8.879996750213199e-06, 'epoch': 0.8}\n",
      "{'eval_loss': 0.383835107088089, 'eval_automl_metric': 0.15366972477064222, 'eval_runtime': 12.1911, 'eval_samples_per_second': 71.528, 'eval_steps_per_second': 71.528, 'epoch': 1.0}\n",
      "{'loss': 0.3629, 'learning_rate': 2.959998916737733e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.3726535737514496, 'eval_automl_metric': 0.14678899082568808, 'eval_runtime': 12.3158, 'eval_samples_per_second': 70.803, 'eval_steps_per_second': 70.803, 'epoch': 2.0}\n",
      "{'train_runtime': 97.7012, 'train_samples_per_second': 204.706, 'train_steps_per_second': 12.794, 'train_loss': 0.4241449279785156, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 02:59:19] {2526} INFO -  at 493.1s,\testimator transformer's best error=0.1468,\tbest estimator transformer's best error=0.1468\n",
      "[flaml.automl.logger: 04-12 02:59:19] {2341} INFO - iteration 4, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.505, 'learning_rate': 1.543094173639824e-05, 'epoch': 0.8}\n",
      "{'eval_loss': 0.3837029039859772, 'eval_automl_metric': 0.16284403669724767, 'eval_runtime': 12.1657, 'eval_samples_per_second': 71.677, 'eval_steps_per_second': 71.677, 'epoch': 1.0}\n",
      "{'loss': 0.334, 'learning_rate': 5.14364724546608e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.35917285084724426, 'eval_automl_metric': 0.14220183486238536, 'eval_runtime': 11.0611, 'eval_samples_per_second': 78.835, 'eval_steps_per_second': 78.835, 'epoch': 2.0}\n",
      "{'train_runtime': 98.0782, 'train_samples_per_second': 203.919, 'train_steps_per_second': 12.745, 'train_loss': 0.3914005249023437, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:01:16] {2526} INFO -  at 610.1s,\testimator transformer's best error=0.1422,\tbest estimator transformer's best error=0.1422\n",
      "[flaml.automl.logger: 04-12 03:01:16] {2341} INFO - iteration 5, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5324, 'learning_rate': 8.879996750213194e-06, 'epoch': 0.8}\n",
      "{'eval_loss': 0.383835107088089, 'eval_automl_metric': 0.15366972477064222, 'eval_runtime': 12.2373, 'eval_samples_per_second': 71.257, 'eval_steps_per_second': 71.257, 'epoch': 1.0}\n",
      "{'loss': 0.3629, 'learning_rate': 2.9599989167377317e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.3726535737514496, 'eval_automl_metric': 0.14678899082568808, 'eval_runtime': 15.0923, 'eval_samples_per_second': 57.778, 'eval_steps_per_second': 57.778, 'epoch': 2.0}\n",
      "{'train_runtime': 96.9835, 'train_samples_per_second': 206.221, 'train_steps_per_second': 12.889, 'train_loss': 0.4241449279785156, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:03:13] {2526} INFO -  at 726.6s,\testimator transformer's best error=0.1422,\tbest estimator transformer's best error=0.1422\n",
      "[flaml.automl.logger: 04-12 03:03:13] {2341} INFO - iteration 6, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4443, 'learning_rate': 3.3201834726658944e-05, 'epoch': 0.8}\n",
      "{'eval_loss': 0.36218205094337463, 'eval_automl_metric': 0.14678899082568808, 'eval_runtime': 15.8225, 'eval_samples_per_second': 55.111, 'eval_steps_per_second': 55.111, 'epoch': 1.0}\n",
      "{'loss': 0.2946, 'learning_rate': 1.106727824221965e-05, 'epoch': 1.6}\n",
      "{'eval_loss': 0.3497363030910492, 'eval_automl_metric': 0.125, 'eval_runtime': 13.5632, 'eval_samples_per_second': 64.292, 'eval_steps_per_second': 64.292, 'epoch': 2.0}\n",
      "{'train_runtime': 99.4348, 'train_samples_per_second': 201.137, 'train_steps_per_second': 12.571, 'train_loss': 0.34309757995605467, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:05:13] {2526} INFO -  at 846.5s,\testimator transformer's best error=0.1250,\tbest estimator transformer's best error=0.1250\n",
      "[flaml.automl.logger: 04-12 03:05:13] {2341} INFO - iteration 7, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.505, 'learning_rate': 1.543094173639823e-05, 'epoch': 0.8}\n",
      "{'eval_loss': 0.3837029039859772, 'eval_automl_metric': 0.16284403669724767, 'eval_runtime': 13.2849, 'eval_samples_per_second': 65.639, 'eval_steps_per_second': 65.639, 'epoch': 1.0}\n",
      "{'loss': 0.334, 'learning_rate': 5.143647245466077e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.35917285084724426, 'eval_automl_metric': 0.14220183486238536, 'eval_runtime': 13.3768, 'eval_samples_per_second': 65.187, 'eval_steps_per_second': 65.187, 'epoch': 2.0}\n",
      "{'train_runtime': 100.6961, 'train_samples_per_second': 198.617, 'train_steps_per_second': 12.414, 'train_loss': 0.3914005249023437, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:07:15] {2526} INFO -  at 969.1s,\testimator transformer's best error=0.1250,\tbest estimator transformer's best error=0.1250\n",
      "[flaml.automl.logger: 04-12 03:07:15] {2341} INFO - iteration 8, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4927, 'learning_rate': 4.960961475644055e-05, 'epoch': 0.4}\n",
      "{'loss': 0.3649, 'learning_rate': 3.720721106733041e-05, 'epoch': 0.8}\n",
      "{'eval_loss': 0.42004191875457764, 'eval_automl_metric': 0.14678899082568808, 'eval_runtime': 12.3779, 'eval_samples_per_second': 70.448, 'eval_steps_per_second': 70.448, 'epoch': 1.0}\n",
      "{'loss': 0.3085, 'learning_rate': 2.4804807378220275e-05, 'epoch': 1.2}\n",
      "{'loss': 0.2846, 'learning_rate': 1.2402403689110137e-05, 'epoch': 1.6}\n",
      "{'loss': 0.2478, 'learning_rate': 0.0, 'epoch': 2.0}\n",
      "{'eval_loss': 0.4621019959449768, 'eval_automl_metric': 0.13188073394495414, 'eval_runtime': 15.9671, 'eval_samples_per_second': 54.612, 'eval_steps_per_second': 54.612, 'epoch': 2.0}\n",
      "{'train_runtime': 176.8274, 'train_samples_per_second': 113.105, 'train_steps_per_second': 14.138, 'train_loss': 0.3397238067626953, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:10:32] {2526} INFO -  at 1166.1s,\testimator transformer's best error=0.1250,\tbest estimator transformer's best error=0.1250\n",
      "[flaml.automl.logger: 04-12 03:10:32] {2341} INFO - iteration 9, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.38070234656333923, 'eval_automl_metric': 0.1571100917431193, 'eval_runtime': 13.2262, 'eval_samples_per_second': 65.93, 'eval_steps_per_second': 65.93, 'epoch': 1.0}\n",
      "{'loss': 0.3891, 'learning_rate': 9.938984432909045e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.3584316074848175, 'eval_automl_metric': 0.1490825688073395, 'eval_runtime': 12.9432, 'eval_samples_per_second': 67.371, 'eval_steps_per_second': 67.371, 'epoch': 2.0}\n",
      "{'train_runtime': 67.7566, 'train_samples_per_second': 295.174, 'train_steps_per_second': 9.239, 'train_loss': 0.36350049149875824, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:12:00] {2526} INFO -  at 1253.6s,\testimator transformer's best error=0.1250,\tbest estimator transformer's best error=0.1250\n",
      "[flaml.automl.logger: 04-12 03:12:00] {2341} INFO - iteration 10, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4701, 'learning_rate': 2.245453717937598e-05, 'epoch': 0.8}\n",
      "{'eval_loss': 0.333243191242218, 'eval_automl_metric': 0.1330275229357798, 'eval_runtime': 13.3288, 'eval_samples_per_second': 65.422, 'eval_steps_per_second': 65.422, 'epoch': 1.0}\n",
      "{'loss': 0.3064, 'learning_rate': 7.484845726458661e-06, 'epoch': 1.6}\n",
      "{'eval_loss': 0.3158172369003296, 'eval_automl_metric': 0.12958715596330272, 'eval_runtime': 13.0704, 'eval_samples_per_second': 66.716, 'eval_steps_per_second': 66.716, 'epoch': 2.0}\n",
      "{'train_runtime': 100.5144, 'train_samples_per_second': 198.977, 'train_steps_per_second': 12.436, 'train_loss': 0.3659558166503906, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:14:03] {2526} INFO -  at 1376.9s,\testimator transformer's best error=0.1250,\tbest estimator transformer's best error=0.1250\n",
      "[flaml.automl.logger: 04-12 03:14:03] {2341} INFO - iteration 11, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4463, 'learning_rate': 5.205038223181688e-05, 'epoch': 0.12}\n",
      "{'loss': 0.318, 'learning_rate': 4.876437325253551e-05, 'epoch': 0.24}\n",
      "{'loss': 0.2922, 'learning_rate': 4.547836427325414e-05, 'epoch': 0.36}\n",
      "{'loss': 0.2593, 'learning_rate': 4.219235529397277e-05, 'epoch': 0.48}\n",
      "{'loss': 0.2689, 'learning_rate': 3.8906346314691405e-05, 'epoch': 0.59}\n",
      "{'loss': 0.2379, 'learning_rate': 3.5620337335410034e-05, 'epoch': 0.71}\n",
      "{'loss': 0.2362, 'learning_rate': 3.233432835612866e-05, 'epoch': 0.83}\n",
      "{'loss': 0.2216, 'learning_rate': 2.9048319376847296e-05, 'epoch': 0.95}\n",
      "{'eval_loss': 0.30998706817626953, 'eval_automl_metric': 0.10550458715596334, 'eval_runtime': 13.4979, 'eval_samples_per_second': 64.603, 'eval_steps_per_second': 64.603, 'epoch': 1.0}\n",
      "{'loss': 0.195, 'learning_rate': 2.5762310397565928e-05, 'epoch': 1.07}\n",
      "{'loss': 0.1818, 'learning_rate': 2.247630141828456e-05, 'epoch': 1.19}\n",
      "{'loss': 0.1702, 'learning_rate': 1.919029243900319e-05, 'epoch': 1.31}\n",
      "{'loss': 0.1764, 'learning_rate': 1.5904283459721823e-05, 'epoch': 1.43}\n",
      "{'loss': 0.1659, 'learning_rate': 1.2618274480440455e-05, 'epoch': 1.54}\n",
      "{'loss': 0.1637, 'learning_rate': 9.332265501159088e-06, 'epoch': 1.66}\n",
      "{'loss': 0.1572, 'learning_rate': 6.046256521877719e-06, 'epoch': 1.78}\n",
      "{'loss': 0.1504, 'learning_rate': 2.7602475425963495e-06, 'epoch': 1.9}\n",
      "{'eval_loss': 0.3402843773365021, 'eval_automl_metric': 0.09977064220183485, 'eval_runtime': 13.5874, 'eval_samples_per_second': 64.177, 'eval_steps_per_second': 64.177, 'epoch': 2.0}\n",
      "{'train_runtime': 525.9446, 'train_samples_per_second': 256.107, 'train_steps_per_second': 16.009, 'train_loss': 0.22444996686559393, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:23:26] {2526} INFO -  at 1939.6s,\testimator transformer's best error=0.0998,\tbest estimator transformer's best error=0.0998\n",
      "[flaml.automl.logger: 04-12 03:23:26] {2341} INFO - iteration 12, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4977, 'learning_rate': 2.4827832220147736e-05, 'epoch': 0.12}\n",
      "{'loss': 0.3577, 'learning_rate': 2.3260418569885886e-05, 'epoch': 0.24}\n",
      "{'loss': 0.298, 'learning_rate': 2.169300491962403e-05, 'epoch': 0.36}\n",
      "{'loss': 0.2778, 'learning_rate': 2.0125591269362177e-05, 'epoch': 0.48}\n",
      "{'loss': 0.2768, 'learning_rate': 1.8558177619100327e-05, 'epoch': 0.59}\n",
      "{'loss': 0.2563, 'learning_rate': 1.6990763968838476e-05, 'epoch': 0.71}\n",
      "{'loss': 0.2446, 'learning_rate': 1.5423350318576622e-05, 'epoch': 0.83}\n",
      "{'loss': 0.2298, 'learning_rate': 1.3855936668314771e-05, 'epoch': 0.95}\n",
      "{'eval_loss': 0.24045641720294952, 'eval_automl_metric': 0.08371559633027525, 'eval_runtime': 13.5012, 'eval_samples_per_second': 64.587, 'eval_steps_per_second': 64.587, 'epoch': 1.0}\n",
      "{'loss': 0.2207, 'learning_rate': 1.2288523018052919e-05, 'epoch': 1.07}\n",
      "{'loss': 0.1969, 'learning_rate': 1.0721109367791068e-05, 'epoch': 1.19}\n",
      "{'loss': 0.1994, 'learning_rate': 9.153695717529216e-06, 'epoch': 1.31}\n",
      "{'loss': 0.1888, 'learning_rate': 7.586282067267364e-06, 'epoch': 1.43}\n",
      "{'loss': 0.1809, 'learning_rate': 6.018868417005512e-06, 'epoch': 1.54}\n",
      "{'loss': 0.1851, 'learning_rate': 4.4514547667436594e-06, 'epoch': 1.66}\n",
      "{'loss': 0.1847, 'learning_rate': 2.8840411164818075e-06, 'epoch': 1.78}\n",
      "{'loss': 0.185, 'learning_rate': 1.3166274662199555e-06, 'epoch': 1.9}\n",
      "{'eval_loss': 0.31772467494010925, 'eval_automl_metric': 0.0905963302752294, 'eval_runtime': 15.9416, 'eval_samples_per_second': 54.7, 'eval_steps_per_second': 54.7, 'epoch': 2.0}\n",
      "{'train_runtime': 528.6445, 'train_samples_per_second': 254.799, 'train_steps_per_second': 15.928, 'train_loss': 0.24481408669659757, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:32:50] {2526} INFO -  at 2503.4s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 03:32:50] {2341} INFO - iteration 13, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4506, 'learning_rate': 1.9036634605968542e-05, 'epoch': 0.24}\n",
      "{'loss': 0.3162, 'learning_rate': 1.6471050427266582e-05, 'epoch': 0.48}\n",
      "{'loss': 0.283, 'learning_rate': 1.3905466248564623e-05, 'epoch': 0.71}\n",
      "{'loss': 0.2499, 'learning_rate': 1.1339882069862663e-05, 'epoch': 0.95}\n",
      "{'eval_loss': 0.2961079776287079, 'eval_automl_metric': 0.12041284403669728, 'eval_runtime': 11.8342, 'eval_samples_per_second': 73.685, 'eval_steps_per_second': 73.685, 'epoch': 1.0}\n",
      "{'loss': 0.2234, 'learning_rate': 8.774297891160704e-06, 'epoch': 1.19}\n",
      "{'loss': 0.2074, 'learning_rate': 6.208713712458743e-06, 'epoch': 1.43}\n",
      "{'loss': 0.1995, 'learning_rate': 3.6431295337567835e-06, 'epoch': 1.66}\n",
      "{'loss': 0.2037, 'learning_rate': 1.077545355054823e-06, 'epoch': 1.9}\n",
      "{'eval_loss': 0.2587985694408417, 'eval_automl_metric': 0.09174311926605505, 'eval_runtime': 13.0236, 'eval_samples_per_second': 66.955, 'eval_steps_per_second': 66.955, 'epoch': 2.0}\n",
      "{'train_runtime': 303.3583, 'train_samples_per_second': 444.023, 'train_steps_per_second': 13.878, 'train_loss': 0.2629204466903578, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:38:27] {2526} INFO -  at 2840.5s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 03:38:27] {2341} INFO - iteration 14, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5133, 'learning_rate': 3.1294026779419606e-05, 'epoch': 0.06}\n",
      "{'loss': 0.3967, 'learning_rate': 3.0336319998313606e-05, 'epoch': 0.12}\n",
      "{'loss': 0.3755, 'learning_rate': 2.9378613217207605e-05, 'epoch': 0.18}\n",
      "{'loss': 0.3482, 'learning_rate': 2.842090643610161e-05, 'epoch': 0.24}\n",
      "{'loss': 0.3325, 'learning_rate': 2.7463199654995608e-05, 'epoch': 0.3}\n",
      "{'loss': 0.3393, 'learning_rate': 2.6505492873889615e-05, 'epoch': 0.36}\n",
      "{'loss': 0.2998, 'learning_rate': 2.5547786092783614e-05, 'epoch': 0.42}\n",
      "{'loss': 0.3135, 'learning_rate': 2.4590079311677617e-05, 'epoch': 0.48}\n",
      "{'loss': 0.3285, 'learning_rate': 2.3632372530571617e-05, 'epoch': 0.53}\n",
      "{'loss': 0.3231, 'learning_rate': 2.267466574946562e-05, 'epoch': 0.59}\n",
      "{'loss': 0.2747, 'learning_rate': 2.171695896835962e-05, 'epoch': 0.65}\n",
      "{'loss': 0.3021, 'learning_rate': 2.0759252187253623e-05, 'epoch': 0.71}\n",
      "{'loss': 0.3086, 'learning_rate': 1.9801545406147622e-05, 'epoch': 0.77}\n",
      "{'loss': 0.2598, 'learning_rate': 1.8843838625041625e-05, 'epoch': 0.83}\n",
      "{'loss': 0.2682, 'learning_rate': 1.7886131843935625e-05, 'epoch': 0.89}\n",
      "{'loss': 0.2836, 'learning_rate': 1.6928425062829628e-05, 'epoch': 0.95}\n",
      "{'eval_loss': 0.3779904544353485, 'eval_automl_metric': 0.0986238532110092, 'eval_runtime': 12.4545, 'eval_samples_per_second': 70.015, 'eval_steps_per_second': 70.015, 'epoch': 1.0}\n",
      "{'loss': 0.2727, 'learning_rate': 1.5970718281723628e-05, 'epoch': 1.01}\n",
      "{'loss': 0.2187, 'learning_rate': 1.5013011500617631e-05, 'epoch': 1.07}\n",
      "{'loss': 0.2318, 'learning_rate': 1.4055304719511632e-05, 'epoch': 1.13}\n",
      "{'loss': 0.2257, 'learning_rate': 1.3097597938405634e-05, 'epoch': 1.19}\n",
      "{'loss': 0.2046, 'learning_rate': 1.2139891157299637e-05, 'epoch': 1.25}\n",
      "{'loss': 0.2128, 'learning_rate': 1.1182184376193638e-05, 'epoch': 1.31}\n",
      "{'loss': 0.2342, 'learning_rate': 1.022447759508764e-05, 'epoch': 1.37}\n",
      "{'loss': 0.2161, 'learning_rate': 9.26677081398164e-06, 'epoch': 1.43}\n",
      "{'loss': 0.2224, 'learning_rate': 8.309064032875642e-06, 'epoch': 1.48}\n",
      "{'loss': 0.2136, 'learning_rate': 7.351357251769644e-06, 'epoch': 1.54}\n",
      "{'loss': 0.223, 'learning_rate': 6.393650470663646e-06, 'epoch': 1.6}\n",
      "{'loss': 0.2009, 'learning_rate': 5.435943689557647e-06, 'epoch': 1.66}\n",
      "{'loss': 0.1951, 'learning_rate': 4.478236908451648e-06, 'epoch': 1.72}\n",
      "{'loss': 0.2069, 'learning_rate': 3.52053012734565e-06, 'epoch': 1.78}\n",
      "{'loss': 0.188, 'learning_rate': 2.5628233462396515e-06, 'epoch': 1.84}\n",
      "{'loss': 0.2092, 'learning_rate': 1.6051165651336533e-06, 'epoch': 1.9}\n",
      "{'loss': 0.1991, 'learning_rate': 6.474097840276549e-07, 'epoch': 1.96}\n",
      "{'eval_loss': 0.40998855233192444, 'eval_automl_metric': 0.08944954128440363, 'eval_runtime': 13.109, 'eval_samples_per_second': 66.519, 'eval_steps_per_second': 66.519, 'epoch': 2.0}\n",
      "{'train_runtime': 1005.387, 'train_samples_per_second': 133.976, 'train_steps_per_second': 16.748, 'train_loss': 0.2697054841113722, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 03:55:51] {2526} INFO -  at 3884.8s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 03:55:51] {2341} INFO - iteration 15, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5456, 'learning_rate': 1.4204334325441244e-05, 'epoch': 0.12}\n",
      "{'loss': 0.3467, 'learning_rate': 1.2290003553818434e-05, 'epoch': 0.24}\n",
      "{'loss': 0.3331, 'learning_rate': 1.0375672782195626e-05, 'epoch': 0.36}\n",
      "{'loss': 0.3057, 'learning_rate': 8.461342010572816e-06, 'epoch': 0.48}\n",
      "{'loss': 0.291, 'learning_rate': 6.547011238950007e-06, 'epoch': 0.59}\n",
      "{'loss': 0.2872, 'learning_rate': 4.632680467327198e-06, 'epoch': 0.71}\n",
      "{'loss': 0.2688, 'learning_rate': 2.718349695704389e-06, 'epoch': 0.83}\n",
      "{'loss': 0.2666, 'learning_rate': 8.040189240815798e-07, 'epoch': 0.95}\n",
      "{'eval_loss': 0.2697773277759552, 'eval_automl_metric': 0.10206422018348627, 'eval_runtime': 13.1895, 'eval_samples_per_second': 66.113, 'eval_steps_per_second': 66.113, 'epoch': 1.0}\n",
      "{'train_runtime': 256.8129, 'train_samples_per_second': 262.249, 'train_steps_per_second': 16.393, 'train_loss': 0.32819755354856367, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 04:00:45] {2526} INFO -  at 4178.9s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 04:00:45] {2341} INFO - iteration 16, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4628, 'learning_rate': 4.151258730652502e-05, 'epoch': 0.12}\n",
      "{'loss': 0.3132, 'learning_rate': 3.980143366651987e-05, 'epoch': 0.24}\n",
      "{'loss': 0.2997, 'learning_rate': 3.8090280026514715e-05, 'epoch': 0.36}\n",
      "{'loss': 0.2644, 'learning_rate': 3.637912638650956e-05, 'epoch': 0.48}\n",
      "{'loss': 0.2709, 'learning_rate': 3.4667972746504405e-05, 'epoch': 0.59}\n",
      "{'loss': 0.2412, 'learning_rate': 3.2956819106499256e-05, 'epoch': 0.71}\n",
      "{'loss': 0.2422, 'learning_rate': 3.12456654664941e-05, 'epoch': 0.83}\n",
      "{'loss': 0.2258, 'learning_rate': 2.953451182648895e-05, 'epoch': 0.95}\n",
      "{'loss': 0.2074, 'learning_rate': 2.7823358186483794e-05, 'epoch': 1.07}\n",
      "{'loss': 0.1859, 'learning_rate': 2.6112204546478642e-05, 'epoch': 1.19}\n",
      "{'loss': 0.1871, 'learning_rate': 2.4401050906473487e-05, 'epoch': 1.31}\n",
      "{'loss': 0.1918, 'learning_rate': 2.268989726646833e-05, 'epoch': 1.43}\n",
      "{'loss': 0.1836, 'learning_rate': 2.097874362646318e-05, 'epoch': 1.54}\n",
      "{'loss': 0.1776, 'learning_rate': 1.9267589986458028e-05, 'epoch': 1.66}\n",
      "{'loss': 0.1691, 'learning_rate': 1.7556436346452876e-05, 'epoch': 1.78}\n",
      "{'loss': 0.1602, 'learning_rate': 1.584528270644772e-05, 'epoch': 1.9}\n",
      "{'eval_loss': 0.31128454208374023, 'eval_automl_metric': 0.0905963302752294, 'eval_runtime': 13.3839, 'eval_samples_per_second': 65.153, 'eval_steps_per_second': 65.153, 'epoch': 2.0}\n",
      "{'loss': 0.1634, 'learning_rate': 1.4134129066442567e-05, 'epoch': 2.02}\n",
      "{'loss': 0.1312, 'learning_rate': 1.2422975426437414e-05, 'epoch': 2.14}\n",
      "{'loss': 0.1499, 'learning_rate': 1.071182178643226e-05, 'epoch': 2.26}\n",
      "{'loss': 0.1355, 'learning_rate': 9.000668146427107e-06, 'epoch': 2.38}\n",
      "{'loss': 0.1231, 'learning_rate': 7.289514506421954e-06, 'epoch': 2.49}\n",
      "{'loss': 0.1271, 'learning_rate': 5.5783608664167995e-06, 'epoch': 2.61}\n",
      "{'loss': 0.1385, 'learning_rate': 3.867207226411647e-06, 'epoch': 2.73}\n",
      "{'loss': 0.1383, 'learning_rate': 2.156053586406493e-06, 'epoch': 2.85}\n",
      "{'loss': 0.1224, 'learning_rate': 4.4489994640133986e-07, 'epoch': 2.97}\n",
      "{'eval_loss': 0.3745174705982208, 'eval_automl_metric': 0.08600917431192656, 'eval_runtime': 13.2675, 'eval_samples_per_second': 65.725, 'eval_steps_per_second': 65.725, 'epoch': 3.0}\n",
      "{'train_runtime': 762.9855, 'train_samples_per_second': 264.811, 'train_steps_per_second': 16.553, 'train_loss': 0.19935976180969867, 'epoch': 3.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 04:14:05] {2526} INFO -  at 4978.9s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 04:14:05] {2341} INFO - iteration 17, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4976, 'learning_rate': 2.4804708485734894e-05, 'epoch': 0.12}\n",
      "{'loss': 0.3505, 'learning_rate': 2.3782255539084654e-05, 'epoch': 0.24}\n",
      "{'loss': 0.3027, 'learning_rate': 2.275980259243441e-05, 'epoch': 0.36}\n",
      "{'loss': 0.2787, 'learning_rate': 2.1737349645784163e-05, 'epoch': 0.48}\n",
      "{'loss': 0.2575, 'learning_rate': 2.0714896699133923e-05, 'epoch': 0.59}\n",
      "{'loss': 0.2483, 'learning_rate': 1.969244375248368e-05, 'epoch': 0.71}\n",
      "{'loss': 0.2386, 'learning_rate': 1.8669990805833436e-05, 'epoch': 0.83}\n",
      "{'loss': 0.2348, 'learning_rate': 1.7647537859183196e-05, 'epoch': 0.95}\n",
      "{'loss': 0.2039, 'learning_rate': 1.6625084912532952e-05, 'epoch': 1.07}\n",
      "{'loss': 0.1986, 'learning_rate': 1.560263196588271e-05, 'epoch': 1.19}\n",
      "{'loss': 0.1961, 'learning_rate': 1.4580179019232463e-05, 'epoch': 1.31}\n",
      "{'loss': 0.1901, 'learning_rate': 1.3557726072582221e-05, 'epoch': 1.43}\n",
      "{'loss': 0.1992, 'learning_rate': 1.253527312593198e-05, 'epoch': 1.54}\n",
      "{'loss': 0.1824, 'learning_rate': 1.1512820179281736e-05, 'epoch': 1.66}\n",
      "{'loss': 0.178, 'learning_rate': 1.0490367232631494e-05, 'epoch': 1.78}\n",
      "{'loss': 0.174, 'learning_rate': 9.46791428598125e-06, 'epoch': 1.9}\n",
      "{'eval_loss': 0.3750886917114258, 'eval_automl_metric': 0.10206422018348627, 'eval_runtime': 13.3143, 'eval_samples_per_second': 65.493, 'eval_steps_per_second': 65.493, 'epoch': 2.0}\n",
      "{'loss': 0.1663, 'learning_rate': 8.445461339331006e-06, 'epoch': 2.02}\n",
      "{'loss': 0.1491, 'learning_rate': 7.4230083926807645e-06, 'epoch': 2.14}\n",
      "{'loss': 0.1528, 'learning_rate': 6.400555446030521e-06, 'epoch': 2.26}\n",
      "{'loss': 0.1535, 'learning_rate': 5.378102499380278e-06, 'epoch': 2.38}\n",
      "{'loss': 0.1493, 'learning_rate': 4.355649552730035e-06, 'epoch': 2.49}\n",
      "{'loss': 0.1507, 'learning_rate': 3.333196606079792e-06, 'epoch': 2.61}\n",
      "{'loss': 0.1568, 'learning_rate': 2.3107436594295493e-06, 'epoch': 2.73}\n",
      "{'loss': 0.1524, 'learning_rate': 1.288290712779306e-06, 'epoch': 2.85}\n",
      "{'loss': 0.1481, 'learning_rate': 2.658377661290632e-07, 'epoch': 2.97}\n",
      "{'eval_loss': 0.37647315859794617, 'eval_automl_metric': 0.09633027522935778, 'eval_runtime': 12.4258, 'eval_samples_per_second': 70.177, 'eval_steps_per_second': 70.177, 'epoch': 3.0}\n",
      "{'train_runtime': 762.6853, 'train_samples_per_second': 264.915, 'train_steps_per_second': 16.56, 'train_loss': 0.2117002085476571, 'epoch': 3.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 04:27:21] {2526} INFO -  at 5775.1s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 04:27:21] {2341} INFO - iteration 18, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.485, 'learning_rate': 2.377204590632704e-05, 'epoch': 0.12}\n",
      "{'loss': 0.3523, 'learning_rate': 2.0568266134584853e-05, 'epoch': 0.24}\n",
      "{'loss': 0.3008, 'learning_rate': 1.7364486362842665e-05, 'epoch': 0.36}\n",
      "{'loss': 0.281, 'learning_rate': 1.4160706591100473e-05, 'epoch': 0.48}\n",
      "{'loss': 0.2656, 'learning_rate': 1.0956926819358287e-05, 'epoch': 0.59}\n",
      "{'loss': 0.26, 'learning_rate': 7.753147047616097e-06, 'epoch': 0.71}\n",
      "{'loss': 0.245, 'learning_rate': 4.5493672758739085e-06, 'epoch': 0.83}\n",
      "{'loss': 0.248, 'learning_rate': 1.3455875041317192e-06, 'epoch': 0.95}\n",
      "{'eval_loss': 0.26702097058296204, 'eval_automl_metric': 0.09518348623853212, 'eval_runtime': 12.5875, 'eval_samples_per_second': 69.275, 'eval_steps_per_second': 69.275, 'epoch': 1.0}\n",
      "{'train_runtime': 258.5842, 'train_samples_per_second': 260.453, 'train_steps_per_second': 16.281, 'train_loss': 0.3011425933475449, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 04:32:15] {2526} INFO -  at 6068.8s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 04:32:15] {2341} INFO - iteration 19, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4688, 'learning_rate': 2.860547592162196e-05, 'epoch': 0.12}\n",
      "{'loss': 0.3323, 'learning_rate': 2.679957466394381e-05, 'epoch': 0.24}\n",
      "{'loss': 0.3065, 'learning_rate': 2.4993673406265652e-05, 'epoch': 0.36}\n",
      "{'loss': 0.2827, 'learning_rate': 2.3187772148587497e-05, 'epoch': 0.48}\n",
      "{'loss': 0.2619, 'learning_rate': 2.1381870890909346e-05, 'epoch': 0.59}\n",
      "{'loss': 0.2546, 'learning_rate': 1.957596963323119e-05, 'epoch': 0.71}\n",
      "{'loss': 0.2411, 'learning_rate': 1.7770068375553037e-05, 'epoch': 0.83}\n",
      "{'loss': 0.2373, 'learning_rate': 1.5964167117874882e-05, 'epoch': 0.95}\n",
      "{'eval_loss': 0.2775367200374603, 'eval_automl_metric': 0.09403669724770647, 'eval_runtime': 13.3919, 'eval_samples_per_second': 65.114, 'eval_steps_per_second': 65.114, 'epoch': 1.0}\n",
      "{'loss': 0.2111, 'learning_rate': 1.415826586019673e-05, 'epoch': 1.07}\n",
      "{'loss': 0.2022, 'learning_rate': 1.2352364602518575e-05, 'epoch': 1.19}\n",
      "{'loss': 0.1888, 'learning_rate': 1.054646334484042e-05, 'epoch': 1.31}\n",
      "{'loss': 0.1875, 'learning_rate': 8.740562087162267e-06, 'epoch': 1.43}\n",
      "{'loss': 0.1873, 'learning_rate': 6.934660829484112e-06, 'epoch': 1.54}\n",
      "{'loss': 0.1884, 'learning_rate': 5.128759571805958e-06, 'epoch': 1.66}\n",
      "{'loss': 0.1936, 'learning_rate': 3.322858314127804e-06, 'epoch': 1.78}\n",
      "{'loss': 0.1865, 'learning_rate': 1.5169570564496495e-06, 'epoch': 1.9}\n",
      "{'eval_loss': 0.3135208785533905, 'eval_automl_metric': 0.08715596330275233, 'eval_runtime': 13.6854, 'eval_samples_per_second': 63.718, 'eval_steps_per_second': 63.718, 'epoch': 2.0}\n",
      "{'train_runtime': 528.2587, 'train_samples_per_second': 254.985, 'train_steps_per_second': 15.939, 'train_loss': 0.24211964459996893, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 04:41:42] {2526} INFO -  at 6635.5s,\testimator transformer's best error=0.0837,\tbest estimator transformer's best error=0.0837\n",
      "[flaml.automl.logger: 04-12 04:41:42] {2642} INFO - selected model: None\n",
      "[flaml.automl.logger: 04-12 04:41:42] {2041} INFO - fit succeeded\n",
      "[flaml.automl.logger: 04-12 04:41:42] {2042} INFO - Time taken to find the best model: 2503.373429775238\n"
     ]
    }
   ],
   "source": [
    "'''The main flaml automl API'''\n",
    "automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "2eE5pLdH4v9M"
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Ehn1SDb5xAH9"
   },
   "source": [
    "The run takes 2.5 hours. We can print the best trial's loss, which is 1-the accuracy. The accuracy we got is 90.9% which is close to 91.2% reported by [the Electra model github](https://github.com/google-research/electra). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "qbTAqBsnTjhG",
    "outputId": "53c86a9e-21d1-4237-9ea8-10710c77407c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The best loss by FLAML: 0.9162844036697247\n"
     ]
    }
   ],
   "source": [
    "print(\"The best loss by FLAML: {}\".format(1-automl.best_loss))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "wcO2th5M6AIu"
   },
   "source": [
    "If you have more GPUs on your server, you can use flaml.tune with the ray tune option, which will often give you a better score. For example, with 4x NVIDIA V100 GPU, the accuracy was 92.2% after searching for half an hour. For that experiment, you can open this notebook on your GPU server and set \"use_ray\" to {\"local_dir\": \"data/output/\"} and n_concurrent_trials to more than 1. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "QFP5JNdPTjhG"
   },
   "source": [
    "### Best model and metric"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "mY07pTY_xlIV"
   },
   "source": [
    "Next, we can print the best hyperparameter and the best score:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "sbnhP3WrTjhG",
    "outputId": "19ed86dc-b08e-4c7b-99dd-fd476b33c257"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best hyperparmeter config: {'learning_rate': 2.6395245870409587e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 33, 'global_max_steps': 4210}\n",
      "Best accuracy on validation data: 0.9163\n",
      "Training duration of best run: 563.8 s\n"
     ]
    }
   ],
   "source": [
    "'''retrieve best config and best learner'''\n",
    "print('Best hyperparmeter config:', automl.best_config)\n",
    "print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))\n",
    "print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "MqIpmxl0dKWu"
   },
   "source": [
    "Save and load the model:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "gfUNXfcNTBA2"
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "automl.pickle(\"automl.pkl\")\n",
    "\n",
    "with open(\"automl.pkl\", \"rb\") as f:\n",
    "    automl = pickle.load(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "6mdBURdexxJS"
   },
   "source": [
    "Run the prediction:\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "kRl7pnEKTjhH",
    "outputId": "31e08579-cb32-4b8e-a903-ab8026c1107e"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predicted labels [1 0 1 1 0 1 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 0 0 0 1\n",
      " 0 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 1 0 1 0 0 0 0 1 0 1 1 1 0 1 1 1 0 0 1 1 0\n",
      " 0 1 0 1 1 0 1 0 0 0 1 1 0 1 1 1 1 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 1 0 0 1 0\n",
      " 0 0 0 1 0 1 1 0 0 1 1 1 0 1 1 0 0 1 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0\n",
      " 0 1 1 1 1 0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0\n",
      " 1 1 1 0 0 0 1 1 1 0 1 1 0 1 0 1 1 0 0 1 0 1 1 0 0 1 0 0 0 0 1 1 0 1 1 0 0\n",
      " 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 1 1 1 0 0 1 1 0 1 0 0 0 0 0 0\n",
      " 1 0 1 0 1 0 0 0 0 0 0 1 0 0 0 1 1 1 1 0 1 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1\n",
      " 0 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 0 1 1 0 0 1 0 0 1 0 1 1 1 0 0 0 1 1\n",
      " 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 1 0 0 0 1 1 0 1\n",
      " 0 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 1 0 0 0 1 1 0 1 1 1 0 1 1 1 0 1 0 1\n",
      " 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 1 0 1 0 0 1 0 1 0 1 1 1 1 0 0 1\n",
      " 0 1 0 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 0 0 0 0 1 1 1 0 1 0 1 1 0 1 0 0 0 1 1\n",
      " 1 1 1 1 1 0 0 1 0 0 0 1 0 1 0 1 0 1 1 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0\n",
      " 1 1 1 0 0 1 1 1 0 1 1 0 1 0 1 1 1 1 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 0\n",
      " 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 1 0 0\n",
      " 0 1 1 0 0 0 0 0 1 0 1 1 1 1 0 1 1 1 0 1 0 1 1 0 1 0 0 0 0 1 1 1 1 1 1 0 1\n",
      " 0 1 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0\n",
      " 1 0 0 1 0 1 0 0 1 1 0 1 1 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 0 1 1 1 1 0 0 0 0\n",
      " 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 0 0 0 1 1 1 1\n",
      " 1 0 1 0 1 0 1 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 1 0 0 1 1 1 1 0 1 1 1 0 0 0\n",
      " 1 1 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1 1 0 1 0 0 1 0 0 0 0 0 0 1 1 1 1 1 0 1 1\n",
      " 0 1 1 1 0 0 1 1 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 0 0 0 1 0 0 0 0 1 1 1 0 0\n",
      " 0 0 1 0 1 1 1 1 1 1 1 1 0 0 0 1 0 1 0 0 1]\n"
     ]
    }
   ],
   "source": [
    "'''compute predictions of testing dataset''' \n",
    "y_pred = automl.predict(X_val, **{\"per_device_eval_batch_size\": 1})\n",
    "print('Predicted labels', y_pred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "QThcVssKTjhH"
   },
   "source": [
    "### Log history"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OEFqWAuLyYIQ"
   },
   "source": [
    "You can also save and plot the history:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "58wpj4vPTjhH",
    "outputId": "1e1eb66b-97bc-4875-84d1-37cf5dc5b667"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 32, 'seed': 20, 'global_max_steps': 939, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 32, 'seed': 20, 'global_max_steps': 939, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 471, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 32, 'seed': 20, 'global_max_steps': 939, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 1.0296683485633468e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 16, 'seed': 26, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 1.0296683485633468e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 16, 'seed': 26, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 1.4799994583688665e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 25, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 1.4799994583688665e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 25, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 2.57182362273304e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 31, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 2.57182362273304e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 31, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 1.4799994583688658e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 25, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 2.57182362273304e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 31, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 2.5718236227330384e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 31, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 6.201201844555069e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 8, 'seed': 29, 'global_max_steps': 2500, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 4.9379398849214773e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 32, 'seed': 29, 'global_max_steps': 626, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 10000, 'Current Hyper-parameters': {'learning_rate': 3.74242286322933e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 23, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 1250, 'FLAML_sample_size': 10000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 67349, 'Current Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 8420, 'FLAML_sample_size': 67349}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 5.5336391211098245e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 29, 'global_max_steps': 8420, 'FLAML_sample_size': 67349}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 67349, 'Current Hyper-parameters': {'learning_rate': 2.6395245870409587e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 33, 'global_max_steps': 4210, 'FLAML_sample_size': 67349}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 2.6395245870409587e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 33, 'global_max_steps': 4210, 'FLAML_sample_size': 67349}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 67349, 'Current Hyper-parameters': {'learning_rate': 2.1602218784670503e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 32, 'seed': 37, 'global_max_steps': 4210, 'FLAML_sample_size': 67349}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 2.6395245870409587e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 16, 'seed': 33, 'global_max_steps': 4210, 'FLAML_sample_size': 67349}}\n"
     ]
    }
   ],
   "source": [
    "from flaml.data import get_output_from_log\n",
    "time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
    "    get_output_from_log(filename=automl_settings['log_file_name'], time_budget=3000)\n",
    "for config in config_history:\n",
    "    print(config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "dtWSrLsdTjhH",
    "outputId": "db4e1844-dd7f-4923-b8b0-b66473248347"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABP2UlEQVR4nO3deVxU1f8/8NfMKMwoMC6sGqukhrgrhAtqkqBFav7KXEKotEzSwhZNFNES/XyKqEwtU6zINFNLKzFFsEwSBTdCyTWN2NwAUURmzu8Pv8zHEVAGZxjgvp6PxzwecO65d973BvHy3HPPyIQQAkREREQSIjd3AURERET1jQGIiIiIJIcBiIiIiCSHAYiIiIgkhwGIiIiIJIcBiIiIiCSHAYiIiIgkhwGIiIiIJIcBiIiIiCSHAYiIGiU3NzeEhoaauwwiaqQYgIgkbM2aNZDJZDhw4IC5S2l0ysrK8MEHH8DX1xdqtRpKpRIdO3ZEeHg4/vrrL3OXR0T30MzcBRAR1UV2djbkcvP8G+7ChQsICgpCeno6Hn/8cYwfPx5WVlbIzs7GunXr8Nlnn6G8vNwstRFR7TAAEZHZVVRUQKvVwsLCotb7WFpamrCiuwsNDcXBgwfx3XffYcyYMXrbFi5ciDlz5hjlfepyXYiodngLjIjuKScnB8899xwcHBxgaWmJLl26YPXq1Xp9ysvLMW/ePPTu3RtqtRotW7bEwIEDkZycrNfv7NmzkMlkeO+99xAXF4cOHTrA0tISWVlZmD9/PmQyGU6ePInQ0FC0atUKarUaYWFhuHbtmt5x7pwDVHk77/fff0dERATs7OzQsmVLjB49GoWFhXr7arVazJ8/H+3atUOLFi0wZMgQZGVl1Wpe0b59+/DTTz/h+eefrxJ+gFvB7L333tN9P3jwYAwePLhKv9DQULi5ud3zuhw8eBDNmjVDdHR0lWNkZ2dDJpNh6dKlurYrV67g1VdfhbOzMywtLeHp6YklS5ZAq9Xe9byIpIYjQER0V/n5+Xj44Ychk8kQHh4OOzs7bNu2Dc8//zyKi4vx6quvAgCKi4vx+eefY9y4cZg8eTJKSkqwatUqBAYGIi0tDT169NA7bnx8PMrKyjBlyhRYWlqiTZs2um1PP/003N3dERMTg4yMDHz++eewt7fHkiVL7lnvK6+8gtatWyMqKgpnz55FXFwcwsPDsX79el2f2bNn4z//+Q+Cg4MRGBiIw4cPIzAwEGVlZfc8/pYtWwAAzz77bC2unuHuvC5OTk4YNGgQvv32W0RFRen1Xb9+PRQKBZ566ikAwLVr1zBo0CDk5OTgxRdfhIuLC/bu3YvZs2cjNzcXcXFxJqmZqFESRCRZ8fHxAoDYv39/jX2ef/554eTkJC5cuKDX/swzzwi1Wi2uXbsmhBCioqJC3LhxQ6/P5cuXhYODg3juued0bWfOnBEAhI2NjSgoKNDrHxUVJQDo9RdCiNGjR4u2bdvqtbm6uopJkyZVOZeAgACh1Wp17a+99ppQKBTiypUrQggh8vLyRLNmzcSoUaP0jjd//nwBQO+Y1Rk9erQAIC5fvnzXfpUGDRokBg0aVKV90qRJwtXVVff93a7Lp59+KgCIo0eP6rV7eXmJRx55RPf9woULRcuWLcVff/2l12/WrFlCoVCIc+fO1apmIingLTAiqpEQAhs3bkRwcDCEELhw4YLuFRgYiKKiImRkZAAAFAqFbq6KVqvFpUuXUFFRgT59+uj63G7MmDGws7Or9n1feuklve8HDhyIixcvori4+J41T5kyBTKZTG9fjUaDv//+GwCQlJSEiooKvPzyy3r7vfLKK/c8NgBdDdbW1rXqb6jqrsuTTz6JZs2a6Y1iZWZmIisrC2PHjtW1bdiwAQMHDkTr1q31/lsFBARAo9Hg119/NUnNRI0Rb4ERUY0KCwtx5coVfPbZZ/jss8+q7VNQUKD7+osvvsD777+P48eP4+bNm7p2d3f3KvtV11bJxcVF7/vWrVsDAC5fvgwbG5u71ny3fQHogpCnp6devzZt2uj63k3l+5eUlKBVq1b37G+o6q6Lra0thg4dim+//RYLFy4EcOv2V7NmzfDkk0/q+p04cQJHjhypMVje/t+KSOoYgIioRpUTZydOnIhJkyZV26dbt24AgISEBISGhmLUqFF44403YG9vD4VCgZiYGJw6darKfiqVqsb3VSgU1bYLIe5Z8/3sWxudO3cGABw9ehQDBw68Z3+ZTFbte2s0mmr713RdnnnmGYSFheHQoUPo0aMHvv32WwwdOhS2tra6PlqtFo8++ijefPPNao/RsWPHe9ZLJBUMQERUIzs7O1hbW0Oj0SAgIOCufb/77jt4eHhg06ZNereg7py4a26urq4AgJMnT+qNtly8eFE3SnQ3wcHBiImJQUJCQq0CUOvWrXH69Okq7ZUjUbU1atQovPjii7rbYH/99Rdmz56t16dDhw64evXqPf9bEREfgyeiu1AoFBgzZgw2btyIzMzMKttvf7y8cuTl9tGOffv2ITU11fSFGmDo0KFo1qwZli9frtd++6Pkd+Pn54egoCB8/vnn+P7776tsLy8vx+uvv677vkOHDjh+/LjetTp8+DB+//13g+pu1aoVAgMD8e2332LdunWwsLDAqFGj9Po8/fTTSE1Nxfbt26vsf+XKFVRUVBj0nkRNGUeAiAirV69GYmJilfYZM2Zg8eLFSE5Ohq+vLyZPngwvLy9cunQJGRkZ2LlzJy5dugQAePzxx7Fp0yaMHj0ajz32GM6cOYMVK1bAy8sLV69ere9TqpGDgwNmzJiB999/H0888QSCgoJw+PBhbNu2Dba2tnqjVzX58ssvMWzYMDz55JMIDg7G0KFD0bJlS5w4cQLr1q1Dbm6ubi2g5557DrGxsQgMDMTzzz+PgoICrFixAl26dKnVpO7bjR07FhMnTsSyZcsQGBhYZQ7SG2+8gS1btuDxxx9HaGgoevfujdLSUhw9ehTfffcdzp49q3fLjEjKGICIqMpoSKXQ0FA88MADSEtLw4IFC7Bp0yYsW7YMbdu2RZcuXfTW5QkNDUVeXh4+/fRTbN++HV5eXkhISMCGDRuQkpJST2dSO0uWLEGLFi2wcuVK7Ny5E35+fvjll18wYMAAKJXKe+5vZ2eHvXv3YtmyZVi/fj3mzJmD8vJyuLq64oknnsCMGTN0fR966CF8+eWXmDdvHiIiIuDl5YWvvvoKa9euNfi6PPHEE1CpVCgpKdF7+qtSixYtsHv3bixatAgbNmzAl19+CRsbG3Ts2BHR0dFQq9UGvR9RUyYTxpoZSETUiF25cgWtW7fGO++8Y7SPsiCihotzgIhIcq5fv16lrXKV5Oo+toKImh7eAiMiyVm/fj3WrFmDESNGwMrKCnv27ME333yDYcOGoX///uYuj4jqAQMQEUlOt27d0KxZM/znP/9BcXGxbmL0O++8Y+7SiKiecA4QERERSQ7nABEREZHkMAARERGR5HAOUDW0Wi3+/fdfWFtb12pRNCIiIjI/IQRKSkrQrl07yOX3GOMRDcDSpUuFq6ursLS0FD4+PmLfvn019i0vLxfR0dHCw8NDWFpaim7duolt27bp9Vm0aJHo06ePsLKyEnZ2dmLkyJHi+PHjta7n/PnzAgBffPHFF1988dUIX+fPn7/n33qzjwCtX78eERERWLFiBXx9fREXF4fAwEBkZ2fD3t6+Sv/IyEgkJCRg5cqV6Ny5M7Zv347Ro0dj79696NmzJwBg9+7dmDZtGvr27YuKigq8/fbbGDZsGLKystCyZct71mRtbQ0AOH/+PGxsbIx7wkRERGQSxcXFcHZ21v0dvxuzPwXm6+uLvn376j6IUKvVwtnZGa+88gpmzZpVpX+7du0wZ84cTJs2Tdc2ZswYqFQqJCQkVPsehYWFsLe3x+7du+Hv73/PmoqLi6FWq1FUVMQARERE1EgY8vfbrJOgy8vLkZ6ejoCAAF2bXC5HQEBAjZ8gfePGjSqf1aNSqbBnz54a36eoqAgA0KZNmxqPWVxcrPciIiKipsusAejChQvQaDRwcHDQa3dwcEBeXl61+wQ
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "plt.title('Learning Curve')\n",
    "plt.xlabel('Wall Clock Time (s)')\n",
    "plt.ylabel('Validation Accuracy')\n",
    "print(len(valid_loss_history))\n",
    "plt.scatter(time_history, 1 - np.array(valid_loss_history))\n",
    "plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "xudzM73mTjhI"
   },
   "source": [
    "## 3. Model selection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "A3gC3u_E4cO1"
   },
   "source": [
    "Given a dataset, which language model should you use for the fine tuning? It appears this is a simple question: just choose the best model according to the benchmarks such as [GLUE](https://gluebenchmark.com/leaderboard). However, we will see that under the resource constraints, the model selection is non trivial. \n",
    "\n",
    "In this example, we will tune the [spooky-author-identification](https://www.kaggle.com/competitions/spooky-author-identification/data?select=train.zip) dataset from kaggle. You can download the dataset from the [here](https://drive.google.com/file/d/1Jk-_Vg_SxOUDfFVzF7S85oBasY8fFvOY/view?usp=sharing) and upload it to Colab. The following command also downloads the file. We run FLAML for 30 mins using bert."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Bty5Qz3x_OzJ",
    "outputId": "8a135114-7367-40a3-a383-ebb891e1f019"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading...\n",
      "From: https://drive.google.com/uc?id=1Jk-_Vg_SxOUDfFVzF7S85oBasY8fFvOY\n",
      "To: /content/spooky-author-identification.csv\n",
      "\r\n",
      "  0% 0.00/3.30M [00:00<?, ?B/s]\r\n",
      "100% 3.30M/3.30M [00:00<00:00, 79.7MB/s]\n"
     ]
    }
   ],
   "source": [
    "!gdown 1Jk-_Vg_SxOUDfFVzF7S85oBasY8fFvOY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "HjvdojhfTjhI",
    "outputId": "954629d5-46a0-4341-d0b4-5e4355ad2bdf"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:19:25] {1768} INFO - task = seq-classification\n",
      "[flaml.automl.logger: 04-12 17:19:25] {1775} INFO - Data split method: stratified\n",
      "[flaml.automl.logger: 04-12 17:19:25] {1778} INFO - Evaluation method: holdout\n",
      "[flaml.automl.logger: 04-12 17:19:25] {1891} INFO - Minimizing error metric: 1-accuracy\n",
      "[flaml.automl.logger: 04-12 17:19:25] {2011} INFO - List of ML learners in AutoML Run: ['transformer']\n",
      "[flaml.automl.logger: 04-12 17:19:25] {2341} INFO - iteration 0, current learner transformer\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "54fca810bfe14b46a0a1ae57821f3391",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1547b188bb9f42d59984856a250bbd96",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "45b9766f23084827814f41ed85d0ea95",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b37ea6febb1e4ef886fa613182b42331",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7923deb016734e3d8167ba2642e697c4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.8885396122932434, 'eval_automl_metric': 0.3881511746680286, 'eval_runtime': 63.5955, 'eval_samples_per_second': 76.971, 'eval_steps_per_second': 76.971, 'epoch': 0.3}\n",
      "{'train_runtime': 138.5184, 'train_samples_per_second': 31.802, 'train_steps_per_second': 0.996, 'train_loss': 0.9821738643922667, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:23:29] {2479} INFO - Estimated sufficient time budget=2440736s. Estimated necessary time budget=2441s.\n",
      "[flaml.automl.logger: 04-12 17:23:29] {2526} INFO -  at 244.1s,\testimator transformer's best error=0.3882,\tbest estimator transformer's best error=0.3882\n",
      "[flaml.automl.logger: 04-12 17:23:29] {2341} INFO - iteration 1, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.9504787921905518, 'eval_automl_metric': 0.454341164453524, 'eval_runtime': 63.833, 'eval_samples_per_second': 76.685, 'eval_steps_per_second': 76.685, 'epoch': 0.3}\n",
      "{'train_runtime': 143.6886, 'train_samples_per_second': 30.658, 'train_steps_per_second': 0.48, 'train_loss': 1.0153502312259397, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:27:17] {2526} INFO -  at 472.5s,\testimator transformer's best error=0.3882,\tbest estimator transformer's best error=0.3882\n",
      "[flaml.automl.logger: 04-12 17:27:17] {2341} INFO - iteration 2, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.7725200057029724, 'eval_automl_metric': 0.31624106230847804, 'eval_runtime': 64.172, 'eval_samples_per_second': 76.279, 'eval_steps_per_second': 76.279, 'epoch': 0.3}\n",
      "{'train_runtime': 136.3992, 'train_samples_per_second': 32.296, 'train_steps_per_second': 2.023, 'train_loss': 0.9211070848547894, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:30:57] {2526} INFO -  at 691.8s,\testimator transformer's best error=0.3162,\tbest estimator transformer's best error=0.3162\n",
      "[flaml.automl.logger: 04-12 17:30:57] {2341} INFO - iteration 3, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.7185199856758118, 'eval_automl_metric': 0.2858018386108274, 'eval_runtime': 63.1708, 'eval_samples_per_second': 77.488, 'eval_steps_per_second': 77.488, 'epoch': 0.3}\n",
      "{'train_runtime': 136.3072, 'train_samples_per_second': 32.318, 'train_steps_per_second': 2.025, 'train_loss': 0.8806653230086617, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:34:34] {2526} INFO -  at 908.9s,\testimator transformer's best error=0.2858,\tbest estimator transformer's best error=0.2858\n",
      "[flaml.automl.logger: 04-12 17:34:34] {2341} INFO - iteration 4, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7593, 'learning_rate': 4.688468079515019e-06, 'epoch': 0.54}\n",
      "{'eval_loss': 0.48282745480537415, 'eval_automl_metric': 0.18263534218590394, 'eval_runtime': 62.0311, 'eval_samples_per_second': 78.912, 'eval_steps_per_second': 78.912, 'epoch': 1.0}\n",
      "{'train_runtime': 299.9815, 'train_samples_per_second': 48.95, 'train_steps_per_second': 3.06, 'train_loss': 0.6506855950116591, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:40:55] {2526} INFO -  at 1289.8s,\testimator transformer's best error=0.1826,\tbest estimator transformer's best error=0.1826\n",
      "[flaml.automl.logger: 04-12 17:40:55] {2341} INFO - iteration 5, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.6676, 'learning_rate': 8.147241469004167e-06, 'epoch': 0.54}\n",
      "{'eval_loss': 0.3991524279117584, 'eval_automl_metric': 0.1542390194075587, 'eval_runtime': 61.4178, 'eval_samples_per_second': 79.7, 'eval_steps_per_second': 79.7, 'epoch': 1.0}\n",
      "{'train_runtime': 299.7831, 'train_samples_per_second': 48.982, 'train_steps_per_second': 3.062, 'train_loss': 0.5576270442123247, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:47:16] {2526} INFO -  at 1671.6s,\testimator transformer's best error=0.1542,\tbest estimator transformer's best error=0.1542\n",
      "[flaml.automl.logger: 04-12 17:47:16] {2642} INFO - selected model: None\n",
      "[flaml.automl.logger: 04-12 17:47:16] {2041} INFO - fit succeeded\n",
      "[flaml.automl.logger: 04-12 17:47:16] {2042} INFO - Time taken to find the best model: 1671.5927600860596\n",
      "[flaml.automl.logger: 04-12 17:47:16] {2054} WARNING - Time taken to find the best model is 93% of the provided time budget and not all estimators' hyperparameter search converged. Consider increasing the time budget.\n"
     ]
    }
   ],
   "source": [
    "from flaml import AutoML\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "df = pd.read_csv('/content/spooky-author-identification.csv')\n",
    "X, y = df.drop('author', axis=1), df['author']\n",
    "\n",
    "X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=123)\n",
    "automl_model = AutoML()\n",
    "\n",
    "automl_settings = {\n",
    "    \"time_budget\": 1800,                 \n",
    "    \"task\": \"seq-classification\",       \n",
    "    \"fit_kwargs_by_estimator\": {\n",
    "        \"transformer\": {\n",
    "            \"output_dir\": \"data/output/\",   \n",
    "            \"model_path\": \"bert-base-uncased\",  \n",
    "        }\n",
    "    },\n",
    "    \"metric\": \"accuracy\",\n",
    "    \"gpu_per_trial\": 1,  \n",
    "    \"log_file_name\": \"spooky_bert.log\", \n",
    "    \"log_type\": \"all\",                 \n",
    "    \"use_ray\": False,                    # set whether to use Ray\n",
    "    \"n_concurrent_trials\": 1,\n",
    "    \"keep_search_state\": True,          # keeping the search state\n",
    "}\n",
    "\n",
    "from flaml import tune\n",
    "custom_hp = {\n",
    "    \"transformer\": {\n",
    "            \"num_train_epochs\": {\n",
    "                \"domain\": tune.choice([0.3, 1, 2, 3, 4, 5]),\n",
    "                \"init_value\": 0.3,  \n",
    "                \"low_cost_init_value\": 0.3,\n",
    "            },\n",
    "        }\n",
    "}\n",
    "\n",
    "automl_model.fit(X_train=X_train, y_train=y_train,X_val=X_val, y_val=y_val, custom_hp=custom_hp, **automl_settings)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9jZiKSU75jjl"
   },
   "source": [
    "The job ran for 23m and searched for 4 trials. This time is shorter than our budget 30m because FLAML early stops the last trial which will run for too long. If you want to run for longer time, set a larger time budget. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "xpA-rzYzTjhI",
    "outputId": "00e69a54-b44e-41f0-ffda-14e3a9fe45c5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the best loss for spooky author identification: 0.1542390194075587\n"
     ]
    }
   ],
   "source": [
    "print(\"the best loss for spooky author identification: {}\".format(automl_model.best_loss))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "TzDjaBTA6ZaD"
   },
   "source": [
    "Next, we set the model to roberta and run again. RoBERTa outperforms BERT by 15% on the [SuperGLUE](https://super.gluebenchmark.com/) benchmark, as well as [GLUE](https://gluebenchmark.com/), [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/), [RACE](https://www.cs.cmu.edu/~glai1/data/race/), etc. Does this mean we should always use RoBERTa and never use BERT? To answer this question, we run the same experiment again with RoBERTa:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6MTZCJz1TjhJ",
    "outputId": "003254b1-149f-4158-d11c-135bfa4dae09"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:48:38] {1768} INFO - task = seq-classification\n",
      "[flaml.automl.logger: 04-12 17:48:38] {1775} INFO - Data split method: stratified\n",
      "[flaml.automl.logger: 04-12 17:48:38] {1778} INFO - Evaluation method: holdout\n",
      "[flaml.automl.logger: 04-12 17:48:38] {1891} INFO - Minimizing error metric: 1-accuracy\n",
      "[flaml.automl.logger: 04-12 17:48:38] {2011} INFO - List of ML learners in AutoML Run: ['transformer']\n",
      "[flaml.automl.logger: 04-12 17:48:38] {2341} INFO - iteration 0, current learner transformer\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9f296e6acfc840aa9c44e432e4f123cf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d9d95117a756415d9decc16facacdab5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e71a4827c72c48778510a63e517470f8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8faac462dd7e4798854b167be7f05201",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0aac55f49a9443aa8890d4874e8e5a86",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.8735764622688293, 'eval_automl_metric': 0.34811031664964254, 'eval_runtime': 62.4127, 'eval_samples_per_second': 78.43, 'eval_steps_per_second': 78.43, 'epoch': 0.3}\n",
      "{'train_runtime': 141.5981, 'train_samples_per_second': 31.111, 'train_steps_per_second': 0.975, 'train_loss': 1.0305425671563633, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:52:30] {2479} INFO - Estimated sufficient time budget=2324423s. Estimated necessary time budget=2324s.\n",
      "[flaml.automl.logger: 04-12 17:52:30] {2526} INFO -  at 232.6s,\testimator transformer's best error=0.3481,\tbest estimator transformer's best error=0.3481\n",
      "[flaml.automl.logger: 04-12 17:52:30] {2341} INFO - iteration 1, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 1.0598695278167725, 'eval_automl_metric': 0.6020429009193053, 'eval_runtime': 61.0626, 'eval_samples_per_second': 80.164, 'eval_steps_per_second': 80.164, 'epoch': 0.3}\n",
      "{'train_runtime': 138.5775, 'train_samples_per_second': 31.789, 'train_steps_per_second': 0.498, 'train_loss': 1.0830751501995584, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:56:12] {2526} INFO -  at 454.0s,\testimator transformer's best error=0.3481,\tbest estimator transformer's best error=0.3481\n",
      "[flaml.automl.logger: 04-12 17:56:12] {2341} INFO - iteration 2, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.6515682339668274, 'eval_automl_metric': 0.2645556690500511, 'eval_runtime': 60.5612, 'eval_samples_per_second': 80.827, 'eval_steps_per_second': 80.827, 'epoch': 0.3}\n",
      "{'train_runtime': 136.2654, 'train_samples_per_second': 32.328, 'train_steps_per_second': 2.025, 'train_loss': 0.8835090968919836, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 17:59:50] {2526} INFO -  at 672.5s,\testimator transformer's best error=0.2646,\tbest estimator transformer's best error=0.2646\n",
      "[flaml.automl.logger: 04-12 17:59:50] {2341} INFO - iteration 3, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.5933533906936646, 'eval_automl_metric': 0.24147088866189992, 'eval_runtime': 62.0616, 'eval_samples_per_second': 78.873, 'eval_steps_per_second': 78.873, 'epoch': 0.3}\n",
      "{'train_runtime': 138.0465, 'train_samples_per_second': 31.911, 'train_steps_per_second': 1.999, 'train_loss': 0.8010869786359262, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:03:31] {2526} INFO -  at 893.5s,\testimator transformer's best error=0.2415,\tbest estimator transformer's best error=0.2415\n",
      "[flaml.automl.logger: 04-12 18:03:31] {2341} INFO - iteration 4, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.737, 'learning_rate': 4.688468079515019e-06, 'epoch': 0.54}\n",
      "{'eval_loss': 0.4920736253261566, 'eval_automl_metric': 0.192849846782431, 'eval_runtime': 61.6342, 'eval_samples_per_second': 79.42, 'eval_steps_per_second': 79.42, 'epoch': 1.0}\n",
      "{'train_runtime': 304.3584, 'train_samples_per_second': 48.246, 'train_steps_per_second': 3.016, 'train_loss': 0.6382612340590533, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:09:59] {2526} INFO -  at 1280.8s,\testimator transformer's best error=0.1928,\tbest estimator transformer's best error=0.1928\n",
      "[flaml.automl.logger: 04-12 18:09:59] {2341} INFO - iteration 5, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.6672, 'learning_rate': 8.147241469004167e-06, 'epoch': 0.54}\n",
      "{'eval_loss': 0.44737380743026733, 'eval_automl_metric': 0.17568947906026555, 'eval_runtime': 64.4479, 'eval_samples_per_second': 75.953, 'eval_steps_per_second': 75.953, 'epoch': 1.0}\n",
      "{'train_runtime': 307.2349, 'train_samples_per_second': 47.794, 'train_steps_per_second': 2.988, 'train_loss': 0.58052682980473, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:16:30] {2526} INFO -  at 1672.1s,\testimator transformer's best error=0.1757,\tbest estimator transformer's best error=0.1757\n",
      "[flaml.automl.logger: 04-12 18:16:30] {2642} INFO - selected model: None\n",
      "[flaml.automl.logger: 04-12 18:16:30] {2041} INFO - fit succeeded\n",
      "[flaml.automl.logger: 04-12 18:16:30] {2042} INFO - Time taken to find the best model: 1672.051875114441\n",
      "[flaml.automl.logger: 04-12 18:16:30] {2054} WARNING - Time taken to find the best model is 93% of the provided time budget and not all estimators' hyperparameter search converged. Consider increasing the time budget.\n"
     ]
    }
   ],
   "source": [
    "automl_settings[\"fit_kwargs_by_estimator\"][\"transformer\"][\"model_path\"] = \"roberta-base\"\n",
    "automl_settings[\"log_file_name\"] = \"spooky_roberta.log\"\n",
    "automl_model.fit(X_train=X_train, y_train=y_train,X_val=X_val, y_val=y_val, custom_hp=custom_hp, **automl_settings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "eJygKvYzkHwQ",
    "outputId": "792398d5-74d8-446d-c0a2-1210ea25b0e0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:18:23] {1768} INFO - task = seq-classification\n",
      "[flaml.automl.logger: 04-12 18:18:23] {1775} INFO - Data split method: stratified\n",
      "[flaml.automl.logger: 04-12 18:18:23] {1778} INFO - Evaluation method: holdout\n",
      "[flaml.automl.logger: 04-12 18:18:23] {1891} INFO - Minimizing error metric: 1-accuracy\n",
      "[flaml.automl.logger: 04-12 18:18:23] {2011} INFO - List of ML learners in AutoML Run: ['transformer_ms']\n",
      "[flaml.automl.logger: 04-12 18:18:23] {2341} INFO - iteration 0, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.8885396122932434, 'eval_automl_metric': 0.3881511746680286, 'eval_runtime': 60.8992, 'eval_samples_per_second': 80.379, 'eval_steps_per_second': 80.379, 'epoch': 0.3}\n",
      "{'train_runtime': 135.217, 'train_samples_per_second': 32.579, 'train_steps_per_second': 1.021, 'train_loss': 0.9821738643922667, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:22:02] {2479} INFO - Estimated sufficient time budget=2181390s. Estimated necessary time budget=2181s.\n",
      "[flaml.automl.logger: 04-12 18:22:02] {2526} INFO -  at 218.2s,\testimator transformer_ms's best error=0.3882,\tbest estimator transformer_ms's best error=0.3882\n",
      "[flaml.automl.logger: 04-12 18:22:02] {2341} INFO - iteration 1, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.9509267807006836, 'eval_automl_metric': 0.4549540347293156, 'eval_runtime': 61.6199, 'eval_samples_per_second': 79.439, 'eval_steps_per_second': 79.439, 'epoch': 0.3}\n",
      "{'train_runtime': 141.4275, 'train_samples_per_second': 31.148, 'train_steps_per_second': 0.488, 'train_loss': 1.0141158173049705, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:25:43] {2526} INFO -  at 439.0s,\testimator transformer_ms's best error=0.3882,\tbest estimator transformer_ms's best error=0.3882\n",
      "[flaml.automl.logger: 04-12 18:25:43] {2341} INFO - iteration 2, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.6531080007553101, 'eval_automl_metric': 0.2659856996935649, 'eval_runtime': 64.0156, 'eval_samples_per_second': 76.466, 'eval_steps_per_second': 76.466, 'epoch': 0.3}\n",
      "{'train_runtime': 139.7816, 'train_samples_per_second': 31.515, 'train_steps_per_second': 1.975, 'train_loss': 0.8856382508208787, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:29:27] {2526} INFO -  at 663.8s,\testimator transformer_ms's best error=0.2660,\tbest estimator transformer_ms's best error=0.2660\n",
      "[flaml.automl.logger: 04-12 18:29:27] {2341} INFO - iteration 3, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.9636347889900208, 'eval_automl_metric': 0.4727272727272728, 'eval_runtime': 62.0469, 'eval_samples_per_second': 78.892, 'eval_steps_per_second': 78.892, 'epoch': 0.3}\n",
      "{'train_runtime': 135.5242, 'train_samples_per_second': 32.505, 'train_steps_per_second': 2.037, 'train_loss': 1.0159390214560688, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:33:03] {2526} INFO -  at 879.9s,\testimator transformer_ms's best error=0.2660,\tbest estimator transformer_ms's best error=0.2660\n",
      "[flaml.automl.logger: 04-12 18:33:03] {2341} INFO - iteration 4, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.5451087951660156, 'eval_automl_metric': 0.21634320735444335, 'eval_runtime': 63.5443, 'eval_samples_per_second': 77.033, 'eval_steps_per_second': 77.033, 'epoch': 0.3}\n",
      "{'train_runtime': 138.1395, 'train_samples_per_second': 31.889, 'train_steps_per_second': 1.998, 'train_loss': 0.735467551411062, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:36:49] {2526} INFO -  at 1105.6s,\testimator transformer_ms's best error=0.2163,\tbest estimator transformer_ms's best error=0.2163\n",
      "[flaml.automl.logger: 04-12 18:36:49] {2341} INFO - iteration 5, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.4679938852787018, 'eval_automl_metric': 0.18181818181818177, 'eval_runtime': 63.3825, 'eval_samples_per_second': 77.23, 'eval_steps_per_second': 77.23, 'epoch': 1.0}\n",
      "{'train_runtime': 301.9871, 'train_samples_per_second': 48.625, 'train_steps_per_second': 1.52, 'train_loss': 0.6205861874914896, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:43:17] {2526} INFO -  at 1493.3s,\testimator transformer_ms's best error=0.1818,\tbest estimator transformer_ms's best error=0.1818\n",
      "[flaml.automl.logger: 04-12 18:43:17] {2341} INFO - iteration 6, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.5505358576774597, 'eval_automl_metric': 0.2140960163432074, 'eval_runtime': 62.2359, 'eval_samples_per_second': 78.652, 'eval_steps_per_second': 78.652, 'epoch': 0.3}\n",
      "{'train_runtime': 136.4186, 'train_samples_per_second': 32.292, 'train_steps_per_second': 2.023, 'train_loss': 0.7632542485776155, 'epoch': 0.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:46:54] {2526} INFO -  at 1710.6s,\testimator transformer_ms's best error=0.1818,\tbest estimator transformer_ms's best error=0.1818\n",
      "[flaml.automl.logger: 04-12 18:46:54] {2341} INFO - iteration 7, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.4987771511077881, 'eval_automl_metric': 0.20081716036772213, 'eval_runtime': 63.2947, 'eval_samples_per_second': 77.337, 'eval_steps_per_second': 77.337, 'epoch': 1.0}\n",
      "{'train_runtime': 302.7583, 'train_samples_per_second': 48.501, 'train_steps_per_second': 1.516, 'train_loss': 0.6465008638003813, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:53:22] {2526} INFO -  at 2098.8s,\testimator transformer_ms's best error=0.1818,\tbest estimator transformer_ms's best error=0.1818\n",
      "[flaml.automl.logger: 04-12 18:53:22] {2341} INFO - iteration 8, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.4121459722518921, 'eval_automl_metric': 0.15955056179775284, 'eval_runtime': 63.1345, 'eval_samples_per_second': 77.533, 'eval_steps_per_second': 77.533, 'epoch': 1.0}\n",
      "{'train_runtime': 302.9621, 'train_samples_per_second': 48.468, 'train_steps_per_second': 1.515, 'train_loss': 0.5716960965158633, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 18:59:46] {2526} INFO -  at 2482.5s,\testimator transformer_ms's best error=0.1596,\tbest estimator transformer_ms's best error=0.1596\n",
      "[flaml.automl.logger: 04-12 18:59:46] {2341} INFO - iteration 9, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.47075968980789185, 'eval_automl_metric': 0.18733401430030638, 'eval_runtime': 64.0365, 'eval_samples_per_second': 76.441, 'eval_steps_per_second': 76.441, 'epoch': 1.0}\n",
      "{'train_runtime': 302.2398, 'train_samples_per_second': 48.584, 'train_steps_per_second': 1.519, 'train_loss': 0.6250811142599401, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 19:06:14] {2526} INFO -  at 2870.2s,\testimator transformer_ms's best error=0.1596,\tbest estimator transformer_ms's best error=0.1596\n",
      "[flaml.automl.logger: 04-12 19:06:14] {2341} INFO - iteration 10, current learner transformer_ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.38394054770469666, 'eval_automl_metric': 0.1491317671092952, 'eval_runtime': 62.4366, 'eval_samples_per_second': 78.4, 'eval_steps_per_second': 78.4, 'epoch': 1.0}\n",
      "{'train_runtime': 300.1761, 'train_samples_per_second': 48.918, 'train_steps_per_second': 1.529, 'train_loss': 0.5415585918883612, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 19:12:35] {2526} INFO -  at 3252.0s,\testimator transformer_ms's best error=0.1491,\tbest estimator transformer_ms's best error=0.1491\n",
      "[flaml.automl.logger: 04-12 19:12:35] {2642} INFO - selected model: None\n",
      "[flaml.automl.logger: 04-12 19:12:35] {2041} INFO - fit succeeded\n",
      "[flaml.automl.logger: 04-12 19:12:35] {2042} INFO - Time taken to find the best model: 3251.999900817871\n",
      "[flaml.automl.logger: 04-12 19:12:35] {2054} WARNING - Time taken to find the best model is 90% of the provided time budget and not all estimators' hyperparameter search converged. Consider increasing the time budget.\n"
     ]
    }
   ],
   "source": [
    "automl_settings[\"time_budget\"] = 3600\n",
    "automl_settings[\"estimator_list\"] = [\"transformer_ms\"]\n",
    "automl_settings[\"log_file_name\"] = \"spooky_ms.log\"     \n",
    "automl_settings[\"fit_kwargs_by_estimator\"] =  { \n",
    "    \"transformer_ms\": {\n",
    "            \"output_dir\": \"data/output/\"     \n",
    "    }\n",
    "}    \n",
    "\n",
    "from flaml import tune\n",
    "\n",
    "custom_hp = {\n",
    "    \"transformer_ms\": {\n",
    "            \"model_path\": {\n",
    "                \"domain\": tune.choice([\"bert-base-uncased\", \"roberta-base\"]),\n",
    "                \"init_value\": \"bert-base-uncased\"\n",
    "            },\n",
    "            \"num_train_epochs\": {\n",
    "                \"domain\": tune.choice([0.3, 1, 2, 3, 4, 5]),\n",
    "                \"init_value\": 0.3,  \n",
    "                \"low_cost_init_value\": 0.3,\n",
    "            },\n",
    "        }\n",
    "}\n",
    "\n",
    "automl_model.fit(X_train=X_train, y_train=y_train,X_val=X_val, y_val=y_val, custom_hp=custom_hp, **automl_settings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "58KIeU-xyj13",
    "outputId": "50801d44-8fb7-4e9b-f566-40cf764cea0b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"record_id\": 0, \"iter_per_learner\": 1, \"logged_metric\": {\"pred_time\": 0.01281316652970124, \"intermediate_results\": [{\"eval_loss\": 0.8885396122932434, \"eval_automl_metric\": 0.3881511746680286, \"eval_runtime\": 60.8992, \"eval_samples_per_second\": 80.379, \"eval_steps_per_second\": 80.379, \"epoch\": 0.3, \"train_runtime\": 135.217, \"train_samples_per_second\": 32.579, \"train_steps_per_second\": 1.021, \"train_loss\": 0.9821738643922667}]}, \"trial_time\": 218.13677191734314, \"wall_clock_time\": 218.22574067115784, \"validation_loss\": 0.3881511746680286, \"config\": {\"learning_rate\": 9.999999999999999e-06, \"num_train_epochs\": 0.3, \"per_device_train_batch_size\": 32, \"seed\": 20, \"global_max_steps\": 138, \"model_path\": \"bert-base-uncased\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 1, \"iter_per_learner\": 2, \"logged_metric\": {\"pred_time\": 0.013203539862939602, \"intermediate_results\": [{\"eval_loss\": 0.9509267807006836, \"eval_automl_metric\": 0.4549540347293156, \"eval_runtime\": 61.6199, \"eval_samples_per_second\": 79.439, \"eval_steps_per_second\": 79.439, \"epoch\": 0.3, \"train_runtime\": 141.4275, \"train_samples_per_second\": 31.148, \"train_steps_per_second\": 0.488, \"train_loss\": 1.0141158173049705}]}, \"trial_time\": 220.74192595481873, \"wall_clock_time\": 438.9754583835602, \"validation_loss\": 0.4549540347293156, \"config\": {\"learning_rate\": 9.706892218498696e-06, \"num_train_epochs\": 0.3, \"per_device_train_batch_size\": 64, \"seed\": 14, \"global_max_steps\": 69, \"model_path\": \"bert-base-uncased\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 2, \"iter_per_learner\": 3, \"logged_metric\": {\"pred_time\": 0.013624806691482435, \"intermediate_results\": [{\"eval_loss\": 0.6531080007553101, \"eval_automl_metric\": 0.2659856996935649, \"eval_runtime\": 64.0156, \"eval_samples_per_second\": 76.466, \"eval_steps_per_second\": 76.466, \"epoch\": 0.3, \"train_runtime\": 139.7816, \"train_samples_per_second\": 31.515, \"train_steps_per_second\": 1.975, \"train_loss\": 0.8856382508208787}]}, \"trial_time\": 224.63303184509277, \"wall_clock_time\": 663.8141267299652, \"validation_loss\": 0.2659856996935649, \"config\": {\"learning_rate\": 1.0301958417692867e-05, \"num_train_epochs\": 0.3, \"per_device_train_batch_size\": 16, \"seed\": 26, \"global_max_steps\": 276, \"model_path\": \"roberta-base\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 3, \"iter_per_learner\": 4, \"logged_metric\": {\"pred_time\": 0.01327014427262989, \"intermediate_results\": [{\"eval_loss\": 0.9636347889900208, \"eval_automl_metric\": 0.4727272727272728, \"eval_runtime\": 62.0469, \"eval_samples_per_second\": 78.892, \"eval_steps_per_second\": 78.892, \"epoch\": 0.3, \"train_runtime\": 135.5242, \"train_samples_per_second\": 32.505, \"train_steps_per_second\": 2.037, \"train_loss\": 1.0159390214560688}]}, \"trial_time\": 215.93403434753418, \"wall_clock_time\": 879.8893353939056, \"validation_loss\": 0.4727272727272728, \"config\": {\"learning_rate\": 4.403698954265022e-06, \"num_train_epochs\": 0.3, \"per_device_train_batch_size\": 16, \"seed\": 31, \"global_max_steps\": 276, \"model_path\": \"bert-base-uncased\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 4, \"iter_per_learner\": 5, \"logged_metric\": {\"pred_time\": 0.013769044720237662, \"intermediate_results\": [{\"eval_loss\": 0.5451087951660156, \"eval_automl_metric\": 0.21634320735444335, \"eval_runtime\": 63.5443, \"eval_samples_per_second\": 77.033, \"eval_steps_per_second\": 77.033, \"epoch\": 0.3, \"train_runtime\": 138.1395, \"train_samples_per_second\": 31.889, \"train_steps_per_second\": 1.998, \"train_loss\": 0.735467551411062}]}, \"trial_time\": 225.4983992576599, \"wall_clock_time\": 1105.595395565033, \"validation_loss\": 0.21634320735444335, \"config\": {\"learning_rate\": 2.4100273052744602e-05, \"num_train_epochs\": 0.3, \"per_device_train_batch_size\": 16, \"seed\": 21, \"global_max_steps\": 276, \"model_path\": \"roberta-base\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 5, \"iter_per_learner\": 6, \"logged_metric\": {\"pred_time\": 0.013779218009348665, \"intermediate_results\": [{\"eval_loss\": 0.4679938852787018, \"eval_automl_metric\": 0.18181818181818177, \"eval_runtime\": 63.3825, \"eval_samples_per_second\": 77.23, \"eval_steps_per_second\": 77.23, \"epoch\": 1.0, \"train_runtime\": 301.9871, \"train_samples_per_second\": 48.625, \"train_steps_per_second\": 1.52, \"train_loss\": 0.6205861874914896}]}, \"trial_time\": 387.57434844970703, \"wall_clock_time\": 1493.3338241577148, \"validation_loss\": 0.18181818181818177, \"config\": {\"learning_rate\": 1.85469436732702e-05, \"num_train_epochs\": 1, \"per_device_train_batch_size\": 32, \"seed\": 19, \"global_max_steps\": 459, \"model_path\": \"roberta-base\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 6, \"iter_per_learner\": 7, \"logged_metric\": {\"pred_time\": 0.013360443982211026, \"intermediate_results\": [{\"eval_loss\": 0.5505358576774597, \"eval_automl_metric\": 0.2140960163432074, \"eval_runtime\": 62.2359, \"eval_samples_per_second\": 78.652, \"eval_steps_per_second\": 78.652, \"epoch\": 0.3, \"train_runtime\": 136.4186, \"train_samples_per_second\": 32.292, \"train_steps_per_second\": 2.023, \"train_loss\": 0.7632542485776155}]}, \"trial_time\": 217.12110829353333, \"wall_clock_time\": 1710.5791184902191, \"validation_loss\": 0.2140960163432074, \"config\": {\"learning_rate\": 2.4100273052744602e-05, \"num_train_epochs\": 0.3, \"per_device_train_batch_size\": 16, \"seed\": 21, \"global_max_steps\": 276, \"model_path\": \"bert-base-uncased\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 7, \"iter_per_learner\": 8, \"logged_metric\": {\"pred_time\": 0.013658072293352667, \"intermediate_results\": [{\"eval_loss\": 0.4987771511077881, \"eval_automl_metric\": 0.20081716036772213, \"eval_runtime\": 63.2947, \"eval_samples_per_second\": 77.337, \"eval_steps_per_second\": 77.337, \"epoch\": 1.0, \"train_runtime\": 302.7583, \"train_samples_per_second\": 48.501, \"train_steps_per_second\": 1.516, \"train_loss\": 0.6465008638003813}]}, \"trial_time\": 387.9533214569092, \"wall_clock_time\": 2098.8422219753265, \"validation_loss\": 0.20081716036772213, \"config\": {\"learning_rate\": 1.3298483157591481e-05, \"num_train_epochs\": 1, \"per_device_train_batch_size\": 32, \"seed\": 11, \"global_max_steps\": 459, \"model_path\": \"roberta-base\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 8, \"iter_per_learner\": 9, \"logged_metric\": {\"pred_time\": 0.013322489303027768, \"intermediate_results\": [{\"eval_loss\": 0.4121459722518921, \"eval_automl_metric\": 0.15955056179775284, \"eval_runtime\": 63.1345, \"eval_samples_per_second\": 77.533, \"eval_steps_per_second\": 77.533, \"epoch\": 1.0, \"train_runtime\": 302.9621, \"train_samples_per_second\": 48.468, \"train_steps_per_second\": 1.515, \"train_loss\": 0.5716960965158633}]}, \"trial_time\": 383.42229533195496, \"wall_clock_time\": 2482.548007249832, \"validation_loss\": 0.15955056179775284, \"config\": {\"learning_rate\": 2.586679364428794e-05, \"num_train_epochs\": 1, \"per_device_train_batch_size\": 32, \"seed\": 27, \"global_max_steps\": 459, \"model_path\": \"bert-base-uncased\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 9, \"iter_per_learner\": 10, \"logged_metric\": {\"pred_time\": 0.013703715837768929, \"intermediate_results\": [{\"eval_loss\": 0.47075968980789185, \"eval_automl_metric\": 0.18733401430030638, \"eval_runtime\": 64.0365, \"eval_samples_per_second\": 76.441, \"eval_steps_per_second\": 76.441, \"epoch\": 1.0, \"train_runtime\": 302.2398, \"train_samples_per_second\": 48.584, \"train_steps_per_second\": 1.519, \"train_loss\": 0.6250811142599401}]}, \"trial_time\": 387.52638721466064, \"wall_clock_time\": 2870.2234270572662, \"validation_loss\": 0.18733401430030638, \"config\": {\"learning_rate\": 1.7750603229357797e-05, \"num_train_epochs\": 1, \"per_device_train_batch_size\": 32, \"seed\": 21, \"global_max_steps\": 459, \"model_path\": \"roberta-base\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"record_id\": 10, \"iter_per_learner\": 11, \"logged_metric\": {\"pred_time\": 0.013274957502947642, \"intermediate_results\": [{\"eval_loss\": 0.38394054770469666, \"eval_automl_metric\": 0.1491317671092952, \"eval_runtime\": 62.4366, \"eval_samples_per_second\": 78.4, \"eval_steps_per_second\": 78.4, \"epoch\": 1.0, \"train_runtime\": 300.1761, \"train_samples_per_second\": 48.918, \"train_steps_per_second\": 1.529, \"train_loss\": 0.5415585918883612}]}, \"trial_time\": 381.45426845550537, \"wall_clock_time\": 3251.999900817871, \"validation_loss\": 0.1491317671092952, \"config\": {\"learning_rate\": 3.7693987341768903e-05, \"num_train_epochs\": 1, \"per_device_train_batch_size\": 32, \"seed\": 33, \"global_max_steps\": 459, \"model_path\": \"bert-base-uncased\"}, \"learner\": \"transformer_ms\", \"sample_size\": 14684}\n",
      "\n",
      "{\"curr_best_record_id\": 10}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "with open(\"spooky_ms.log\", \"r\") as fin:\n",
    "  for line in fin:\n",
    "    print(line)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ujti_Dih5_-3"
   },
   "source": [
    "We plot the performance of BERT, RoBERTa, and model selection w.r.t. the wall clock time. We find two things: \n",
    "\n",
    "(1) although RoBERTa frequently outperforms BERT on benchmark datasets, its performance on the spooky-author-identification dataset is worse than BERT using the same time budget. Therefore, model selection is a non trivial problem;\n",
    "\n",
    "(2) by using FLAML's automated model selection, we are able to achieve a better performance than using just one model. Therefore, automated model selection is helpful;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rxV-dTUhxaQO",
    "outputId": "4bc9e6b5-6fe7-4da8-9e64-8de2b47fd080"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6\n",
      "6\n",
      "11\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAGiCAYAAADEJZ3cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABKXklEQVR4nO3dfVxUZcI//s/MMDPMJA8qMDwsgqZi5FOoEJquFQrVanbvd9fMIs2lVya9rMkUSyVsE9t+kVtrunerZnf3htVa2qqoUNiWKCW16qqUStIDDz7cgjjAPF2/P4jRkQFmcIY5MJ93L14x17nmnOtcHJwP5zrnOjIhhAARERGRhMm93QAiIiKizjCwEBERkeQxsBAREZHkMbAQERGR5DGwEBERkeQxsBAREZHkMbAQERGR5DGwEBERkeQxsBAREZHkMbAQERGR5HUpsKxduxaxsbHw9/dHUlISSktL261rMpmwcuVK3HjjjfD398eoUaNQUFBwXeskIiIi3+JyYNmyZQv0ej2ys7NRVlaGUaNGITU1FbW1tQ7rL1u2DH/961/x+uuv49ixY3jsscdw33334euvv+7yOomIiMi3yFx9+GFSUhLGjRuHv/zlLwAAq9WK6OhoPPHEE8jKympTPzIyEs899xwWLFhgK/vtb38LjUaDd955p0vrJCIiIt/i50plo9GIQ4cOYenSpbYyuVyOlJQUlJSUOHxPc3Mz/P397co0Gg0+//zz61pnc3Oz7bXVasWFCxfQv39/yGQyV3aJiIiIvEQIgUuXLiEyMhJyeceDPi4FlnPnzsFisUCn09mV63Q6nDhxwuF7UlNTkZeXh0mTJuHGG29EUVERtm7dCovF0uV15ubmIicnx5WmExERkUT98MMP+NWvftVhHZcCS1f8+c9/RkZGBoYNGwaZTIYbb7wRc+fOxcaNG7u8zqVLl0Kv19te19XVYcCAAaioqEBAQIA7mt3jmEwmfPrpp7j99tuhVCq93RzJYj85j33lHPaTc9hPzvG1frp06RIGDhzo1Ge3S4ElJCQECoUCNTU1duU1NTUIDw93+J7Q0FB89NFHaGpqwvnz5xEZGYmsrCwMGjSoy+tUq9VQq9Vtyvv164fAwEBXdqnXMJlM0Gq16N+/v08c5F3FfnIe+8o57CfnsJ+c42v91LqPzlzO4dJdQiqVCmPGjEFRUZGtzGq1oqioCMnJyR2+19/fH1FRUTCbzfjHP/6Be++997rXSURERL7B5SEhvV6Phx9+GGPHjkViYiLWrFmDy5cvY+7cuQCA9PR0REVFITc3FwBw8OBB/PTTTxg9ejR++uknPP/887BarVi8eLHT6yQiIiLf5nJgmTlzJs6ePYsVK1aguroao0ePRkFBge2i2crKSrsrfZuamrBs2TKcPn0affr0wd13343/+Z//QXBwsNPrJCIiIt/WpYtuMzMzkZmZ6XBZcXGx3etf//rXOHbs2HWt0x2EEDCbzba7k3obk8kEPz8/NDU1SX4fFQoF/Pz8eAs6ERE5zeN3CUmB0WhEVVUVDAaDt5viMUIIhIeH44cffugRQUCr1SIiIgIqlcrbTSEioh6g1wcWq9WKiooKKBQKREZGQqVS9YgPdFdZrVY0NDSgT58+nU6+401CCBiNRpw9exYVFRUYMmSIpNtLRETS0OsDi9FotE31r9Vqvd0cj7FarTAajfD395d8ANBoNFAqlThz5oytzURERB2R9iebG0n9Q9zX8OdBRESu4KcGERERSR4DCxEREUkeA4uETZ48GU8++aS3m0FEROR1DCw+aM6cOZgxY4a3m0FEROS0Xn+XkDtZrAKlFRdQe6kJYQH+SBzYDwp5z7lF2mKx9MpbuomIqPfjGRYnFRytwm0vfYJZbx7AwvxvMOvNA7jtpU9QcLTKo9s1m83IzMxEUFAQQkJCsHz5cgghAADNzc1YtGgRoqKiEBAQgJSUFLuZht966y0EBwdj+/btiI+Ph1qtxiOPPILNmzdj27ZtkMlkkMlkbWYnJiIikhqeYXFCwdEqzH+nDOKa8uq6Jsx/pwzrHkxA2vAIj2x78+bNmDdvHkpLS/HVV1/h0UcfxYABA5CRkYHMzEwcO3YM+fn5CA8PR35+Pu6++24cOXIEQ4YMAQAYDAa89NJL+Nvf/ob+/fsjIiICjY2NqK+vx6ZNmwAA/fr180jbiYiI3IWBpRMWq0DOx8fahBUAEABkAHI+PoYp8eEeGR6Kjo7Gq6++CplMhri4OBw5cgSvvvoqUlNTsWnTJlRWViIyMhJWqxVPPPEE9u3bh02bNmHVqlUAWp4x9MYbb2DUqFG2dWo0GjQ3NyM8PNzt7SUiIvIEBpZOlFZcQFVdU7vLBYCquiaUVlxA8o393b79W2+91e66k+TkZLzyyis4cuQILBYLhg4dale/ubkZ/ftfaYdKpcLIkSPd3i4iIqLuxMDSidpL7YeVrtRzl4aGBigUChw6dAgKhcLuWUKBgYG2ehqNhhfaEhFRj8fA0omwAOeec+NsPVcdPHjQ7vWBAwcwZMgQ3HLLLbBYLKitrcXEiRNhtVpRX1+PwMDATqe9V6lUsFgsHmkvERGRJ/AuoU4kDuyHiCB/tHeOQgYgIqjlFmdPqKyshF6vR3l5Od599128/vrrWLhwIYYOHYrZs2cjPT0dW7duRUVFBQ4dOoTVq1djx44dHa4zNjYWhw8fRnl5Oc6dOweTyeSRthMREbkLA0snFHIZsqfFA0Cb0NL6OntavMfmY0lPT0djYyMSExOxYMECLFy4EI8++igAYNOmTUhPT8fTTz+Nm266CQ8++CC+/PJLDBgwoMN1ZmRkIC4uDmPHjkVoaCi++OILj7SdiIjIXTgk5IS04RFY92ACcj4+ZncBbniQP7KnxXvsluar50dZt25dm+VKpRI5OTnIyclxOCQ0Z84czJkzp837QkNDsWfPHo+0mYiIyBMYWJyUNjwCU+LDe/RMt0RERD0VA4sLFHKZR25dJiIioo7xGhYiIiKSPAYWIiIikjwGFiIiIpI8BhYiIiKSPAYWIiIikjwGFiIiIpI8BhYiIiKSPAaWXuaOO+7Ak08+6e1mEBERuRUnjnOF1QKc2Q801AB9dEDMeECu8Harus2cOXNw8eJFfPTRR95uChER+RgGFmcd2w4ULAHqf75SFhgJpL0ExE/vliYYjUaoVKpu2dbVLBYLZDI+goCIiLyHQ0LOOLYdeC/dPqwAQH1VS/mx7R7Z7OTJk5GZmYknn3wSISEhSE1Nxb59+5CYmAi1Wo2IiAhkZWXBbDbbvc9sNiMzMxNBQUEICQnB8uXLIYSwLW9ubsaiRYsQFRWFG264AUlJSXYPWnzrrbcQHByM7du3Iz4+Hmq1Go888gg2b96Mbdu2QSaTQSaT2d6zZMkSDB06FFqtFoMGDcLy5cthMpk80idEROSbeIalM1ZLy5kVCAcLBQAZUJAFDLvHI8NDmzdvxvz58/HFF1+guroad999N+bMmYO3334bJ06cQEZGBvz9/bFixQq798ybNw+lpaX46quv8Oijj2LAgAHIyMgAAGRmZuLYsWPIz89HZGQkPvzwQ6SlpeHIkSMYMmQIAMBgMOCll17C3/72N/Tv3x8RERFobGxEfX09Nm3aBADo168fACAgIABvvfUWIiMjceTIEWRkZCAgIACLFy92e38QEZFvYmDpzJn9bc+s2BFA/U8t9QZOdPvmhwwZgj/96U8AgLfffhvR0dH4y1/+AplMhmHDhuHnn3/GkiVLsGzZMtt7oqOj8eqrr0ImkyEuLg5HjhzBq6++ioyMDFRWVmLTpk2orKxEZGQkAGDRokUoKCjApk2bsGrVKgCAyWTCG2+8gVGjRtnWq9Fo0NzcjPDwcLs2Xr3t2NhYLFq0CPn5+QwsRETkNgwsnWmocW89F40ZM8b2/fHjx5GcnGx3PcmECRPQ0NCAH3/8EcHBwQCAW2+91a5OcnIyXnnlFVgsFhw5cgQWiwVDhw61205zczP697/yJGqVSoWRI0c61cYtW7bgtddew6lTp9DQ0ACz2YzAwMCu7C4REZFDDCyd6aNzbz0X3XDDDW5
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from flaml.data import get_output_from_log\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "axs = []\n",
    "for each_file_name in ['bert', 'roberta', 'ms']:\n",
    "    time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
    "        get_output_from_log(filename='spooky_' + each_file_name + '.log', time_budget=4000)\n",
    "    print(len(valid_loss_history))\n",
    "    axs.append(plt.scatter(time_history, 1 - np.array(valid_loss_history)))\n",
    "    plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n",
    "\n",
    "plt.legend(handles=axs, labels=['bert', 'roberta', 'ms'])\n",
    "plt.ylim([0.6, 0.9])\n",
    "plt.grid()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "lT7IwNCoTjhJ"
   },
   "source": [
    "## 4. Other Tasks"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Fzkr77iATjhJ"
   },
   "source": [
    "Besides sequence classification, FLAML currently also supports four other tasks (more tasks are to be supported, which can be found on [FLAML's documentation website](https://microsoft.github.io/FLAML/docs/Examples/AutoML-NLP)):\n",
    "\n",
    "- sequence regression: predicting a float number from the input sequence, e.g., predicting the rating of a hotel review based on the text content;\n",
    "- token classification: predicting the label of each token in a sequence, e.g., named entity recognition;\n",
    "- multiple choice: predicting the best second half of a sentence that comes next to the first part of a sentence based on common sensen reasoning. An example is seen below;\n",
    "- (abstractive) summarization: generating the textual summarization of an input paragraph;\n",
    "\n",
    "Here we look into two tasks: multiple choice classification and text summarization. These tasks require significant computational resources, therefore instead of Colab, we run them using 4 NVIDIA V100 GPUs and Ray Tune on our server."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Y4VgUR5TTjhJ"
   },
   "source": [
    "### 4.1 Multiple Choice Example"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OO8GqaH3TjhJ"
   },
   "source": [
    "Multiple choice is a task of predicting the best second half of a sentence that follows the first half based on common sense reasoning. An example of multiple-choice classification problem is:\n",
    "\n",
    "On stage, a woman takes a seat at the piano. She\n",
    "a) sits on a bench as her sister plays with the doll.\n",
    "b) smiles with someone as the music plays.\n",
    "c) is in the crowd, watching the dancers.\n",
    "d) *nervously sets her fingers on the keys*."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "hQ5fX0N3TjhJ",
    "outputId": "e17bd3ce-9d38-42cf-f3ea-30a0095a34b5"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "178b92c7a57342ee89b3712e27b80caf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading builder script:   0%|          | 0.00/7.97k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bc33019034e545a4ac0e2185aaee2ed5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading metadata:   0%|          | 0.00/7.10k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f9830cd830784138a9b645fc12f32d96",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading readme:   0%|          | 0.00/8.88k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:datasets.builder:No config specified, defaulting to: swag/regular\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading and preparing dataset swag/regular to /root/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9e3222f707f7410d82bcc97c5a99bff8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5ce0aad2cdf14056a8f39bd3adb188fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/6.71M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "75cf1bbba93a46dc850d445e8251a7a6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/2.24M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "db30a5e75c244a87a0a9bcd54e1a067e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/2.21M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7d3dca0965ba40d58d4ba69542f9e263",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2510d30013314da4aba64c80554f4e2c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/73546 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "98960faf7e4f4f1eb682fd243193085e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating validation split:   0%|          | 0/20006 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "736e659ba14d44fe90a7a120b3d017be",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating test split:   0%|          | 0/20005 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset swag downloaded and prepared to /root/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c. Subsequent calls will reuse this data.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:datasets.builder:No config specified, defaulting to: swag/regular\n",
      "WARNING:datasets.builder:Found cached dataset swag (/root/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c)\n",
      "WARNING:datasets.builder:No config specified, defaulting to: swag/regular\n",
      "WARNING:datasets.builder:Found cached dataset swag (/root/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2000\n",
      "1000\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "train_dataset = load_dataset(\"swag\", split=\"train\").to_pandas()[:2000]\n",
    "dev_dataset = load_dataset(\"swag\", split=\"validation\").to_pandas()[:1000]\n",
    "test_dataset = load_dataset(\"swag\", split=\"test\").to_pandas()\n",
    "\n",
    "custom_sent_keys = [\n",
    "        \"sent1\",\n",
    "        \"sent2\",\n",
    "        \"ending0\",\n",
    "        \"ending1\",\n",
    "        \"ending2\",\n",
    "        \"ending3\",\n",
    "        \"gold-source\",\n",
    "        \"video-id\",\n",
    "        \"startphrase\",\n",
    "        \"fold-ind\",\n",
    "    ]                                                  # specify the column names of the input sentences\n",
    "label_key = \"label\"                                    # specify the column name of the label\n",
    "\n",
    "X_train, y_train = train_dataset[custom_sent_keys], train_dataset[label_key]\n",
    "X_val, y_val = dev_dataset[custom_sent_keys], dev_dataset[label_key]\n",
    "X_test = test_dataset[custom_sent_keys]\n",
    "\n",
    "print(len(X_train))\n",
    "print(len(X_val))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "19m2ZpRGTjhJ",
    "outputId": "11b26237-dbda-4abd-f371-eb785dfd8bc3"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "string"
      },
      "text/plain": [
       "'Members of the procession walk down the street holding small horn brass instruments.'"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_dataset.iloc[0][\"sent1\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "uvNeyzFsTjhJ",
    "outputId": "842317a3-fe07-47f4-ccfa-80b3015ff0e0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 15:20:37] {1768} INFO - task = multichoice-classification\n",
      "[flaml.automl.logger: 04-12 15:20:37] {1775} INFO - Data split method: stratified\n",
      "[flaml.automl.logger: 04-12 15:20:37] {1778} INFO - Evaluation method: holdout\n",
      "[flaml.automl.logger: 04-12 15:20:37] {1891} INFO - Minimizing error metric: 1-accuracy\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/flaml/automl/data.py:297: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  X[str_columns] = X[str_columns].astype(\"string\")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 15:20:37] {2011} INFO - List of ML learners in AutoML Run: ['transformer']\n",
      "[flaml.automl.logger: 04-12 15:20:37] {2341} INFO - iteration 0, current learner transformer\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9b531a3023c47c2a95d89cc66cce75b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ad661687b4974715a282f1b971e33d9f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0246c0387fd447a28f0c78ee9c9b5105",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e3ef0fcdae434cee8482bdcc44705d8e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2d2ef6376ac140ed997ccb03cedfb067",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 1.0387, 'learning_rate': 6.666666666666666e-06, 'epoch': 1.0}\n",
      "{'loss': 0.5781, 'learning_rate': 3.333333333333333e-06, 'epoch': 2.0}\n",
      "{'eval_loss': 0.8650197982788086, 'eval_automl_metric': 0.29300000000000004, 'eval_runtime': 14.2894, 'eval_samples_per_second': 69.982, 'eval_steps_per_second': 69.982, 'epoch': 2.0}\n",
      "{'loss': 0.363, 'learning_rate': 0.0, 'epoch': 3.0}\n",
      "{'eval_loss': 0.9072939157485962, 'eval_automl_metric': 0.29100000000000004, 'eval_runtime': 13.6284, 'eval_samples_per_second': 73.376, 'eval_steps_per_second': 73.376, 'epoch': 3.0}\n",
      "{'train_runtime': 270.546, 'train_samples_per_second': 22.177, 'train_steps_per_second': 5.544, 'train_loss': 0.659941151936849, 'epoch': 3.0}\n",
      "[flaml.automl.logger: 04-12 15:25:50] {2479} INFO - Estimated sufficient time budget=3129882s. Estimated necessary time budget=3130s.\n",
      "[flaml.automl.logger: 04-12 15:25:50] {2526} INFO -  at 313.1s,\testimator transformer's best error=0.2910,\tbest estimator transformer's best error=0.2910\n",
      "[flaml.automl.logger: 04-12 15:25:50] {2341} INFO - iteration 1, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 1.0538, 'learning_rate': 6.474576669243438e-06, 'epoch': 1.0}\n",
      "{'loss': 0.6091, 'learning_rate': 3.237288334621719e-06, 'epoch': 2.0}\n",
      "{'eval_loss': 0.859893798828125, 'eval_automl_metric': 0.30200000000000005, 'eval_runtime': 13.9904, 'eval_samples_per_second': 71.477, 'eval_steps_per_second': 71.477, 'epoch': 2.0}\n",
      "{'loss': 0.3889, 'learning_rate': 0.0, 'epoch': 3.0}\n",
      "{'eval_loss': 0.8932241201400757, 'eval_automl_metric': 0.30800000000000005, 'eval_runtime': 13.5564, 'eval_samples_per_second': 73.766, 'eval_steps_per_second': 73.766, 'epoch': 3.0}\n",
      "{'train_runtime': 268.5443, 'train_samples_per_second': 22.343, 'train_steps_per_second': 5.586, 'train_loss': 0.6839515177408854, 'epoch': 3.0}\n",
      "[flaml.automl.logger: 04-12 15:30:44] {2526} INFO -  at 607.4s,\testimator transformer's best error=0.2910,\tbest estimator transformer's best error=0.2910\n",
      "[flaml.automl.logger: 04-12 15:30:44] {2341} INFO - iteration 2, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 1.0315, 'learning_rate': 6.864455657088979e-06, 'epoch': 1.0}\n",
      "{'loss': 0.5715, 'learning_rate': 3.4322278285444894e-06, 'epoch': 2.0}\n",
      "{'eval_loss': 0.786681056022644, 'eval_automl_metric': 0.28500000000000003, 'eval_runtime': 14.1835, 'eval_samples_per_second': 70.505, 'eval_steps_per_second': 70.505, 'epoch': 2.0}\n",
      "{'loss': 0.3374, 'learning_rate': 0.0, 'epoch': 3.0}\n",
      "{'eval_loss': 0.8425467610359192, 'eval_automl_metric': 0.28700000000000003, 'eval_runtime': 13.4773, 'eval_samples_per_second': 74.199, 'eval_steps_per_second': 74.199, 'epoch': 3.0}\n",
      "{'train_runtime': 268.049, 'train_samples_per_second': 22.384, 'train_steps_per_second': 5.596, 'train_loss': 0.6467840881347656, 'epoch': 3.0}\n",
      "[flaml.automl.logger: 04-12 15:35:38] {2526} INFO -  at 901.2s,\testimator transformer's best error=0.2850,\tbest estimator transformer's best error=0.2850\n",
      "[flaml.automl.logger: 04-12 15:35:38] {2341} INFO - iteration 3, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.9884, 'learning_rate': 7.3999972918443325e-06, 'epoch': 1.0}\n",
      "{'eval_loss': 0.7787197828292847, 'eval_automl_metric': 0.29300000000000004, 'eval_runtime': 13.7995, 'eval_samples_per_second': 72.466, 'eval_steps_per_second': 72.466, 'epoch': 1.0}\n",
      "{'loss': 0.4754, 'learning_rate': 0.0, 'epoch': 2.0}\n",
      "{'eval_loss': 0.8423631191253662, 'eval_automl_metric': 0.28700000000000003, 'eval_runtime': 13.2491, 'eval_samples_per_second': 75.477, 'eval_steps_per_second': 75.477, 'epoch': 2.0}\n",
      "{'train_runtime': 190.5498, 'train_samples_per_second': 20.992, 'train_steps_per_second': 5.248, 'train_loss': 0.7318977355957031, 'epoch': 2.0}\n",
      "[flaml.automl.logger: 04-12 15:39:15] {2526} INFO -  at 1117.8s,\testimator transformer's best error=0.2850,\tbest estimator transformer's best error=0.2850\n",
      "[flaml.automl.logger: 04-12 15:39:15] {2341} INFO - iteration 4, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 1.0922, 'learning_rate': 5.372722783974802e-06, 'epoch': 1.0}\n",
      "{'loss': 0.66, 'learning_rate': 3.5818151893165346e-06, 'epoch': 2.0}\n",
      "{'loss': 0.4378, 'learning_rate': 1.7909075946582673e-06, 'epoch': 3.0}\n",
      "{'eval_loss': 0.8736429214477539, 'eval_automl_metric': 0.29300000000000004, 'eval_runtime': 14.0322, 'eval_samples_per_second': 71.265, 'eval_steps_per_second': 71.265, 'epoch': 3.0}\n",
      "{'loss': 0.3225, 'learning_rate': 0.0, 'epoch': 4.0}\n",
      "{'eval_loss': 0.9011046290397644, 'eval_automl_metric': 0.29200000000000004, 'eval_runtime': 13.052, 'eval_samples_per_second': 76.616, 'eval_steps_per_second': 76.616, 'epoch': 4.0}\n",
      "{'train_runtime': 344.1498, 'train_samples_per_second': 23.246, 'train_steps_per_second': 5.811, 'train_loss': 0.6281196594238281, 'epoch': 4.0}\n",
      "[flaml.automl.logger: 04-12 15:45:25] {2526} INFO -  at 1487.7s,\testimator transformer's best error=0.2850,\tbest estimator transformer's best error=0.2850\n",
      "[flaml.automl.logger: 04-12 15:45:25] {2341} INFO - iteration 5, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 1.1162, 'learning_rate': 1.192849707902046e-05, 'epoch': 1.0}\n",
      "{'loss': 0.5407, 'learning_rate': 5.96424853951023e-06, 'epoch': 2.0}\n",
      "{'eval_loss': 1.1717097759246826, 'eval_automl_metric': 0.375, 'eval_runtime': 13.9408, 'eval_samples_per_second': 71.732, 'eval_steps_per_second': 71.732, 'epoch': 2.0}\n",
      "{'loss': 0.2207, 'learning_rate': 0.0, 'epoch': 3.0}\n",
      "{'eval_loss': 1.4500138759613037, 'eval_automl_metric': 0.366, 'eval_runtime': 13.1329, 'eval_samples_per_second': 76.145, 'eval_steps_per_second': 76.145, 'epoch': 3.0}\n",
      "{'train_runtime': 267.6649, 'train_samples_per_second': 22.416, 'train_steps_per_second': 5.604, 'train_loss': 0.6258482004801432, 'epoch': 3.0}\n",
      "[flaml.automl.logger: 04-12 15:50:19] {2526} INFO -  at 1781.7s,\testimator transformer's best error=0.2850,\tbest estimator transformer's best error=0.2850\n",
      "[flaml.automl.logger: 04-12 15:50:19] {2642} INFO - selected model: None\n",
      "[flaml.automl.logger: 04-12 15:50:19] {2041} INFO - fit succeeded\n",
      "[flaml.automl.logger: 04-12 15:50:19] {2042} INFO - Time taken to find the best model: 901.1799373626709\n"
     ]
    }
   ],
   "source": [
    "''' import AutoML class from flaml package '''\n",
    "from flaml import AutoML\n",
    "automl = AutoML()\n",
    "\n",
    "automl_settings = {\n",
    "    \"time_budget\": 1800,                 # setting the time budget\n",
    "    \"task\": \"multichoice-classification\",       # setting the task as multiplechoice-classification\n",
    "    \"fit_kwargs_by_estimator\": {          # if model_path is not set, the default model is facebook/muppet-roberta-base: https://huggingface.co/facebook/muppet-roberta-base\n",
    "        \"transformer\": {\n",
    "            \"output_dir\": \"data/output/\",  # setting the output directory\n",
    "            \"model_path\": \"bert-base-uncased\", # the batch size for validation (inference)\n",
    "        }\n",
    "    },\n",
    "    \"gpu_per_trial\": 1,                 # set to 0 if no GPU is available\n",
    "    \"log_file_name\": \"seqclass.log\",    # set the file to save the log for HPO\n",
    "    \"log_type\": \"all\",                  # the log type for trials: \"all\" if logging all the trials, \"better\" if only keeping the better trials\n",
    "    \"use_ray\": False,                    # set whether to use Ray\n",
    "    \"n_concurrent_trials\": 1,\n",
    "    \"fp16\": False\n",
    "}\n",
    "\n",
    "from flaml import tune\n",
    "custom_hp = {\n",
    "    \"transformer\": {\n",
    "            \"per_device_train_batch_size\": {\n",
    "                \"domain\": tune.choice([1, 2, 4]),\n",
    "                \"init_value\": 4,\n",
    "                \"low_cost_init_value\": 4,\n",
    "            },\n",
    "        }\n",
    "}\n",
    "\n",
    "'''The main flaml automl API'''\n",
    "automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, custom_hp=custom_hp, **automl_settings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "kh7ZJsIKTjhJ",
    "outputId": "6de4231b-ff23-4ffa-b2ce-6b550b346129"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Current Learner': 'transformer', 'Current Sample': 2000, 'Current Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 20, 'global_max_steps': 1500}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 20, 'global_max_steps': 1500}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 2000, 'Current Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 14, 'global_max_steps': 1000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 20, 'global_max_steps': 1500}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 2000, 'Current Hyper-parameters': {'learning_rate': 1.0296683485633468e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 26, 'global_max_steps': 1000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 1.0296683485633468e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 26, 'global_max_steps': 1000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 2000, 'Current Hyper-parameters': {'learning_rate': 1.4799994583688665e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 4, 'seed': 25, 'global_max_steps': 1000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 1.0296683485633468e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 26, 'global_max_steps': 1000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 2000, 'Current Hyper-parameters': {'learning_rate': 7.163630378633069e-06, 'num_train_epochs': 4, 'per_device_train_batch_size': 4, 'seed': 27, 'global_max_steps': 2000}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 1.0296683485633468e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 26, 'global_max_steps': 1000}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 2000, 'Current Hyper-parameters': {'learning_rate': 1.789274561853069e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 32, 'global_max_steps': 1500}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 1.0296683485633468e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'seed': 26, 'global_max_steps': 1000}}\n",
      "6\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABToklEQVR4nO3dfVzN9/8/8MfpVKdclIt0OiVysZCEheTaRMxszNfVB7ka+5CJzOi7L81cxIzZxoS52Mbm2pYhF4kxTchVHyohGV2wdMGonPP6/eHX+7OjokOnU70f99vt3G7O6/16v8/z1ez08Hq/3u+3QgghQERERCQjZqYugIiIiKisMQARERGR7DAAERERkewwABEREZHsMAARERGR7DAAERERkewwABEREZHsMAARERGR7DAAERERkewwABFRheTi4oLRo0ebugwiqqAYgIhkbOPGjVAoFDhz5oypS6lwHj9+jC+++AJeXl6wtbWFlZUVXF1dMXnyZCQkJJi6PCJ6AXNTF0BE9DLi4+NhZmaaf8Pdu3cPvXv3xtmzZ/HWW2/hX//6F6pVq4b4+Hhs2bIFa9asQV5enklqI6KSYQAiIpN78uQJdDodLC0tS7yPSqUyYkXPN3r0aJw7dw47duzAwIED9bbNmzcPH3/8cal8zsv8XIioZHgKjIhe6Pbt2xg7dizUajVUKhWaN2+O9evX6/XJy8vDnDlz4OnpCVtbW1StWhWdO3dGZGSkXr+kpCQoFAp8/vnnWL58ORo1agSVSoXLly/jk08+gUKhQGJiIkaPHo0aNWrA1tYWY8aMwd9//613nGfXABWczvv9998RGBiIOnXqoGrVqhgwYADu3r2rt69Op8Mnn3wCR0dHVKlSBd27d8fly5dLtK7o1KlT2Lt3L8aNG1co/ABPg9nnn38uve/WrRu6detWqN/o0aPh4uLywp/LuXPnYG5ujrlz5xY6Rnx8PBQKBVasWCG1ZWZmYurUqXB2doZKpULjxo2xePFi6HS6546LSG44A0REz5WWlob27dtDoVBg8uTJqFOnDvbv349x48YhOzsbU6dOBQBkZ2fj22+/xbBhwzB+/Hjk5ORg3bp18PX1RXR0NFq1aqV33A0bNuDx48eYMGECVCoVatWqJW0bPHgwGjRogJCQEMTExODbb7+Fvb09Fi9e/MJ6P/jgA9SsWRPBwcFISkrC8uXLMXnyZGzdulXqExQUhM8++wz9+vWDr68vLly4AF9fXzx+/PiFxw8LCwMAjBw5sgQ/PcM9+3PRaDTo2rUrtm3bhuDgYL2+W7duhVKpxKBBgwAAf//9N7p27Yrbt2/j/fffR7169XDy5EkEBQUhJSUFy5cvN0rNRBWSICLZ2rBhgwAgTp8+XWyfcePGCY1GI+7du6fXPnToUGFrayv+/vtvIYQQT548Ebm5uXp97t+/L9RqtRg7dqzUduPGDQFA2NjYiPT0dL3+wcHBAoBefyGEGDBggKhdu7ZeW/369cWoUaMKjcXHx0fodDqpfdq0aUKpVIrMzEwhhBCpqanC3Nxc9O/fX+94n3zyiQCgd8yiDBgwQAAQ9+/ff26/Al27dhVdu3Yt1D5q1ChRv3596f3zfi6rV68WAMSlS5f02t3c3MQbb7whvZ83b56oWrWqSEhI0Os3a9YsoVQqRXJycolqJpIDngIjomIJIbBz507069cPQgjcu3dPevn6+iIrKwsxMTEAAKVSKa1V0el0yMjIwJMnT9CmTRupzz8NHDgQderUKfJz//3vf+u979y5M/766y9kZ2e/sOYJEyZAoVDo7avVanHz5k0AQEREBJ48eYJJkybp7ffBBx+88NgApBqqV69eov6GKurn8u6778Lc3FxvFis2NhaXL1/GkCFDpLbt27ejc+fOqFmzpt5/Kx8fH2i1Wvz2229GqZmoIuIpMCIq1t27d5GZmYk1a9ZgzZo1RfZJT0+X/vzdd99h6dKliIuLQ35+vtTeoEGDQvsV1VagXr16eu9r1qwJALh//z5sbGyeW/Pz9gUgBaHGjRvr9atVq5bU93kKPj8nJwc1atR4YX9DFfVzsbOzQ48ePbBt2zbMmzcPwNPTX+bm5nj33XelflevXsXFixeLDZb//G9FJHcMQERUrIKFsyNGjMCoUaOK7OPh4QEA2LRpE0aPHo3+/ftjxowZsLe3h1KpREhICK5du1ZoP2tr62I/V6lUFtkuhHhhza+yb0k0bdoUAHDp0iV07tz5hf0VCkWRn63VaovsX9zPZejQoRgzZgzOnz+PVq1aYdu2bejRowfs7OykPjqdDj179sRHH31U5DFcXV1fWC+RXDAAEVGx6tSpg+rVq0Or1cLHx+e5fXfs2IGGDRti165deqegnl24a2r169cHACQmJurNtvz111/SLNHz9OvXDyEhIdi0aVOJAlDNmjVx/fr1Qu0FM1El1b9/f7z//vvSabCEhAQEBQXp9WnUqBEePHjwwv9WRMTL4InoOZRKJQYOHIidO3ciNja20PZ/Xl5eMPPyz9mOU6dOISoqyviFGqBHjx4wNzfHqlWr9Nr/eSn583h7e6N379749ttv8fPPPxfanpeXhw8//FB636hRI8TFxen9rC5cuIDff//doLpr1KgBX19fbNu2DVu2bIGlpSX69++v12fw4MGIiorCgQMHCu2fmZmJJ0+eGPSZRJUZZ4CICOvXr0d4eHih9oCAACxatAiRkZHw8vLC+PHj4ebmhoyMDMTExODw4cPIyMgAALz11lvYtWsXBgwYgL59++LGjRsIDQ2Fm5sbHjx4UNZDKpZarUZAQACWLl2Kt99+G71798aFCxewf/9+2NnZ6c1eFef7779Hr1698O6776Jfv37o0aMHqlatiqtXr2LLli1ISUmR7gU0duxYLFu2DL6+vhg3bhzS09MRGhqK5s2bl2hR9z8NGTIEI0aMwDfffANfX99Ca5BmzJiBsLAwvPXWWxg9ejQ8PT3x8OFDXLp0CTt27EBSUpLeKTMiOWMAIqJCsyEFRo8ejbp16yI6Ohqffvopdu3ahW+++Qa1a9dG8+bN9e7LM3r0aKSmpmL16tU4cOAA3NzcsGnTJmzfvh1Hjx4to5GUzOLFi1GlShWsXbsWhw8fhre3Nw4ePIhOnTrBysrqhfvXqVMHJ0+exDfffIOtW7fi448/Rl5eHurXr4+3334bAQEBUt9mzZrh+++/x5w5cxAYGAg3Nzf88MMP+PHHHw3+ubz99tuwtrZGTk6O3tVfBapUqYJjx45h4cKF2L59O77//nvY2NjA1dUVc+fOha2trUGfR1SZKURprQwkIqrAMjMzUbNmTcyfP7/UHmVBROUX1wARkew8evSoUFvBXZKLemwFEVU+PAVGRLKzdetWbNy4EW+++SaqVauGEydO4KeffkKvXr3QsWNHU5dHRGWAAYiIZMfDwwPm5ub47LPPkJ2dLS2Mnj9/vqlLI6IywjVAREREJDtcA0RERESywwBEREREssM1QEXQ6XS4c+cOqlevXqKbohEREZHpCSGQk5MDR0dHmJk9f46HAagId+7cgbOzs6nLICIiopdw69Yt1K1b97l9GICKUL16dQBPf4A2NjYmroaIiIhKIjs7G87OztLv8edhACpCwWkvGxsbBiAiIqIKpiTLV7gImoiIiGSHAYiIiIhkhwGIiIiIZIcBiIiIiGSHAYiIiIhkhwGIiIiIZIcBiIiIiGSHAYiIiIhkhwGIiIiIZId3giail6LVCUTfyEB6zmPYV7dCuwa1oDTjw4OJqGJgACIig4XHpmDunstIyXostWlsrRDczw293TUmrIyIqGR4CoyIDBIem4KJm2L0wg8ApGY9xsRNMQiPTTFRZUREJccZICIqMa1OYO6eyxBFbCtoCw77Dzo2tuPpMCJ6IWsLZYkeXGoMDEBEVGLRNzIKzfw8Ky07Fy0+OVhGFRFRRXb5U19UsTRNFOEpMCIqsfSc54cfIqKKgjNARFRi9tWtStRvw+i28GpYy8jVEFFFZ22hNNlnMwARUYm1a1ALGlsrpGY9LnIdkAKAg60VurjW4RogIirXeAqMiEpMaaZAcD+
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from flaml.data import get_output_from_log\n",
    "time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
    "    get_output_from_log(filename=automl_settings['log_file_name'], time_budget=3000)\n",
    "for config in config_history:\n",
    "    print(config)\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "plt.title('Learning Curve')\n",
    "plt.xlabel('Wall Clock Time (s)')\n",
    "plt.ylabel('Validation Accuracy')\n",
    "print(len(valid_loss_history))\n",
    "plt.scatter(time_history, 1 - np.array(valid_loss_history))\n",
    "plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "664qCdihTjhJ"
   },
   "source": [
    "### 4.2 Text Summarization Example"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "kmB4kaF_TjhJ"
   },
   "source": [
    "The text summarization task summarizes a long text into a short sentence. For example:\n",
    "\n",
    "- Document: Army explosives experts were called out to deal with a suspect package at the offices on the Newtownards Road on Friday night. Roads were sealed off and traffic diverted as a controlled explosion was carried out. The premises, used by East Belfast MP Naomi Long, have been targeted a number of times. Most recently, petrol bomb attacks were carried out on the offices on consecutive nights in April and May. The attacks began following a Belfast City Council vote in December 2012 restricting the flying of the union flag at the City Hall. Condemning the latest hoax, Alliance MLA Chris Lyttle said: \"It is a serious incident for the local area, it causes serious disruption, it puts people's lives at risk, it can prevent emergency services reaching the area. \"Ultimately we need people with information to share that with the police in order for them to do their job and bring these people to justice.\n",
    "\n",
    "- Summary: A suspicious package left outside an Alliance Party office in east Belfast has been declared a hoax.\n",
    "\n",
    "In this example, we use FLAML to perform *abstractive summarization* using the t5-small language model, i.e., the summary is generated word-by-word. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "amlQnvcxTjhK",
    "outputId": "e9c0c7fc-25af-4f71-f10d-2ad49bbdf0f7"
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a8a74fbdcfb0446bbd3bed5ff20e019a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading builder script:   0%|          | 0.00/5.76k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "24f2f0cbb85047869a0482cd53e16794",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading readme:   0%|          | 0.00/6.24k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading and preparing dataset xsum/default to /root/.cache/huggingface/datasets/xsum/default/1.2.0/082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e316310cc7e043c4b90e376c7a75aaf0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e9583654a3fe40dc83b314b98befcdc4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/255M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bed7b214c8cf4d6ead7ac1e2efe576f9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/1.00M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "790331deea9b47919ceb5ad53d5f9c40",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/204045 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ae35414d84a844a5bcf936650efb8c34",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating validation split:   0%|          | 0/11332 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7089718c04724ccaa86034d0b8b7130e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating test split:   0%|          | 0/11334 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset xsum downloaded and prepared to /root/.cache/huggingface/datasets/xsum/default/1.2.0/082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71. Subsequent calls will reuse this data.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:datasets.builder:Found cached dataset xsum (/root/.cache/huggingface/datasets/xsum/default/1.2.0/082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71)\n",
      "WARNING:datasets.builder:Found cached dataset xsum (/root/.cache/huggingface/datasets/xsum/default/1.2.0/082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000\n",
      "400\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "train_dataset = load_dataset(\"xsum\", split=\"train\").to_pandas()[:1000]\n",
    "valid_dataset = load_dataset(\"xsum\", split=\"validation\").to_pandas()[:400]\n",
    "test_dataset = load_dataset(\"xsum\", split=\"test\").to_pandas()\n",
    "\n",
    "custom_sent_keys = [\"document\"]       # specify the column names of the input sentences\n",
    "label_key = \"summary\"                 # specify the column name of the label                              \n",
    "\n",
    "X_train, y_train = train_dataset[custom_sent_keys], train_dataset[label_key]\n",
    "X_val, y_val = valid_dataset[custom_sent_keys], valid_dataset[label_key]\n",
    "X_test = test_dataset[custom_sent_keys]\n",
    "\n",
    "print(len(train_dataset))\n",
    "print(len(valid_dataset))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "aYq8XAtxTjhK",
    "outputId": "3fb9a111-ba6f-4d75-d0d8-4d4c76173163"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 15:52:13] {1768} INFO - task = summarization\n",
      "[flaml.automl.logger: 04-12 15:52:13] {1775} INFO - Data split method: uniform\n",
      "[flaml.automl.logger: 04-12 15:52:13] {1778} INFO - Evaluation method: holdout\n",
      "[flaml.automl.logger: 04-12 15:52:13] {1891} INFO - Minimizing error metric: rouge1\n",
      "[flaml.automl.logger: 04-12 15:52:13] {2011} INFO - List of ML learners in AutoML Run: ['transformer']\n",
      "[flaml.automl.logger: 04-12 15:52:13] {2341} INFO - iteration 0, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/flaml/automl/data.py:297: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  X[str_columns] = X[str_columns].astype(\"string\")\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a35ff1cffae842c0acc37707e4a541ea",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0a316b24169342caaccd5bd9fefc9a3b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "50969066597c4ca9ab4bd2dae2c17c0b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "29a315f149a04f7887e345e6006b101c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading pytorch_model.bin:   0%|          | 0.00/242M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "969f654854874beaacf59257ea6df20e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Unzipping tokenizers/punkt.zip.\n",
      "/usr/local/lib/python3.9/dist-packages/flaml/automl/ml.py:209: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n",
      "  metric = datasets.load_metric(datasets_metric_name)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1ac587d53608479a9789a65ffad25ef8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 4.139829158782959, 'eval_automl_metric': 0.861043247562474, 'eval_runtime': 88.7601, 'eval_samples_per_second': 4.507, 'eval_steps_per_second': 4.507, 'epoch': 0.12}\n",
      "{'train_runtime': 93.8448, 'train_samples_per_second': 1.066, 'train_steps_per_second': 0.043, 'train_loss': 4.210696220397949, 'epoch': 0.12}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 15:55:25] {2479} INFO - Estimated sufficient time budget=1915790s. Estimated necessary time budget=1916s.\n",
      "[flaml.automl.logger: 04-12 15:55:25] {2526} INFO -  at 191.6s,\testimator transformer's best error=0.8610,\tbest estimator transformer's best error=0.8610\n",
      "[flaml.automl.logger: 04-12 15:55:25] {2341} INFO - iteration 1, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 4.153659820556641, 'eval_automl_metric': 0.8610390521300715, 'eval_runtime': 83.9022, 'eval_samples_per_second': 4.767, 'eval_steps_per_second': 4.767, 'epoch': 0.12}\n",
      "{'train_runtime': 88.4476, 'train_samples_per_second': 1.131, 'train_steps_per_second': 0.023, 'train_loss': 4.123888969421387, 'epoch': 0.12}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 15:58:24] {2526} INFO -  at 370.6s,\testimator transformer's best error=0.8610,\tbest estimator transformer's best error=0.8610\n",
      "[flaml.automl.logger: 04-12 15:58:24] {2341} INFO - iteration 2, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 4.139829158782959, 'eval_automl_metric': 0.861043247562474, 'eval_runtime': 81.3582, 'eval_samples_per_second': 4.917, 'eval_steps_per_second': 4.917, 'epoch': 0.12}\n",
      "{'train_runtime': 85.9738, 'train_samples_per_second': 1.163, 'train_steps_per_second': 0.047, 'train_loss': 4.210696220397949, 'epoch': 0.12}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 16:01:21] {2526} INFO -  at 547.4s,\testimator transformer's best error=0.8610,\tbest estimator transformer's best error=0.8610\n",
      "[flaml.automl.logger: 04-12 16:01:21] {2341} INFO - iteration 3, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 4.143656253814697, 'eval_automl_metric': 0.8608808953259102, 'eval_runtime': 80.6625, 'eval_samples_per_second': 4.959, 'eval_steps_per_second': 4.959, 'epoch': 0.12}\n",
      "{'train_runtime': 85.1755, 'train_samples_per_second': 1.174, 'train_steps_per_second': 0.023, 'train_loss': 4.254851341247559, 'epoch': 0.12}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 16:04:17] {2526} INFO -  at 723.4s,\testimator transformer's best error=0.8609,\tbest estimator transformer's best error=0.8609\n",
      "[flaml.automl.logger: 04-12 16:04:17] {2341} INFO - iteration 4, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 4.041061878204346, 'eval_automl_metric': 0.8605258085686105, 'eval_runtime': 81.8821, 'eval_samples_per_second': 4.885, 'eval_steps_per_second': 4.885, 'epoch': 1.0}\n",
      "{'train_runtime': 100.4684, 'train_samples_per_second': 9.953, 'train_steps_per_second': 0.159, 'train_loss': 4.15610408782959, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 16:07:29] {2526} INFO -  at 915.9s,\testimator transformer's best error=0.8605,\tbest estimator transformer's best error=0.8605\n",
      "[flaml.automl.logger: 04-12 16:07:29] {2341} INFO - iteration 5, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 3.9432952404022217, 'eval_automl_metric': 0.8609624645900873, 'eval_runtime': 80.9141, 'eval_samples_per_second': 4.944, 'eval_steps_per_second': 4.944, 'epoch': 1.0}\n",
      "{'train_runtime': 99.1635, 'train_samples_per_second': 10.084, 'train_steps_per_second': 0.161, 'train_loss': 4.104092121124268, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 16:10:40] {2526} INFO -  at 1106.4s,\testimator transformer's best error=0.8605,\tbest estimator transformer's best error=0.8605\n",
      "[flaml.automl.logger: 04-12 16:10:40] {2341} INFO - iteration 6, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 4.097936630249023, 'eval_automl_metric': 0.8612426478909745, 'eval_runtime': 80.6458, 'eval_samples_per_second': 4.96, 'eval_steps_per_second': 4.96, 'epoch': 1.0}\n",
      "{'train_runtime': 99.0009, 'train_samples_per_second': 10.101, 'train_steps_per_second': 0.162, 'train_loss': 4.184399127960205, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 16:13:50] {2526} INFO -  at 1296.4s,\testimator transformer's best error=0.8605,\tbest estimator transformer's best error=0.8605\n",
      "[flaml.automl.logger: 04-12 16:13:50] {2341} INFO - iteration 7, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 3.8940911293029785, 'eval_automl_metric': 0.8598968257660171, 'eval_runtime': 80.9135, 'eval_samples_per_second': 4.944, 'eval_steps_per_second': 4.944, 'epoch': 1.0}\n",
      "{'train_runtime': 99.2612, 'train_samples_per_second': 10.074, 'train_steps_per_second': 0.161, 'train_loss': 4.087018013000488, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 16:17:00] {2526} INFO -  at 1486.5s,\testimator transformer's best error=0.8599,\tbest estimator transformer's best error=0.8599\n",
      "[flaml.automl.logger: 04-12 16:17:00] {2341} INFO - iteration 8, current learner transformer\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 4.041061878204346, 'eval_automl_metric': 0.8605258085686105, 'eval_runtime': 81.0644, 'eval_samples_per_second': 4.934, 'eval_steps_per_second': 4.934, 'epoch': 1.0}\n",
      "{'train_runtime': 98.6821, 'train_samples_per_second': 10.134, 'train_steps_per_second': 0.162, 'train_loss': 4.15610408782959, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/dist-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.9/dist-packages/transformers/tokenization_utils_base.py:3586: UserWarning: `as_target_tokenizer` is deprecated and will be removed in v5 of Transformers. You can tokenize your labels by using the argument `text_target` of the regular `__call__` method (either in the same call as your input texts if you use the same keyword arguments, or in a separate call.\n",
      "  warnings.warn(\n",
      "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[flaml.automl.logger: 04-12 16:20:09] {2526} INFO -  at 1675.4s,\testimator transformer's best error=0.8599,\tbest estimator transformer's best error=0.8599\n",
      "[flaml.automl.logger: 04-12 16:20:09] {2642} INFO - selected model: None\n",
      "[flaml.automl.logger: 04-12 16:20:09] {2041} INFO - fit succeeded\n",
      "[flaml.automl.logger: 04-12 16:20:09] {2042} INFO - Time taken to find the best model: 1486.497179031372\n",
      "[flaml.automl.logger: 04-12 16:20:09] {2054} WARNING - Time taken to find the best model is 83% of the provided time budget and not all estimators' hyperparameter search converged. Consider increasing the time budget.\n"
     ]
    }
   ],
   "source": [
    "''' import AutoML class from flaml package '''\n",
    "from flaml import AutoML\n",
    "automl = AutoML()\n",
    "\n",
    "import ray\n",
    "\n",
    "\n",
    "automl_settings = {\n",
    "    \"time_budget\": 1800,         # setting the time budget\n",
    "    \"task\": \"summarization\",    # setting the task as summarization\n",
    "    \"fit_kwargs_by_estimator\": {  # if model_path is not set, the default model is t5-small: https://huggingface.co/t5-small\n",
    "        \"transformer\": {\n",
    "            \"output_dir\": \"data/output/\",  # setting the output directory\n",
    "            \"model_path\": \"t5-small\",\n",
    "            \"pad_to_max_length\": True,\n",
    "        }\n",
    "    },\n",
    "    \"gpu_per_trial\": 1,  # set to 0 if no GPU is available\n",
    "    \"log_file_name\": \"seqclass.log\",  # set the file to save the log for HPO\n",
    "    \"log_type\": \"all\",   # the log type for trials: \"all\" if logging all the trials, \"better\" if only keeping the better trials\n",
    "    \"use_ray\": False,  # set whether to use Ray\n",
    "    \"metric\": \"rouge1\",\n",
    "    \"n_concurrent_trials\": 1,  \n",
    "    \"fp16\": False\n",
    "}\n",
    "\n",
    "from flaml import tune\n",
    "custom_hp = {\n",
    "    \"transformer\": {\n",
    "            \"num_train_epochs\": {\n",
    "                \"domain\": tune.choice([0.1, 1, 2, 3, 4, 5]),\n",
    "                \"init_value\": 0.1,  \n",
    "                \"low_cost_init_value\": 0.1,\n",
    "            },\n",
    "        }\n",
    "}\n",
    "\n",
    "\n",
    "'''The main flaml automl API'''\n",
    "automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, custom_hp=custom_hp, **automl_settings)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "xRWfyDdSJZRT"
   },
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "xPy67MBFTjhK",
    "outputId": "265348a5-20a2-4a49-a73e-0969688abb36"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 32, 'seed': 20, 'global_max_steps': 4}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.999999999999999e-06, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 32, 'seed': 20, 'global_max_steps': 4}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 2}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 2}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 9.999999999999997e-06, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 32, 'seed': 20, 'global_max_steps': 4}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 2}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 1.3959402525606234e-05, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 64, 'seed': 13, 'global_max_steps': 2}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 1.3959402525606234e-05, 'num_train_epochs': 0.1, 'per_device_train_batch_size': 64, 'seed': 13, 'global_max_steps': 2}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 16}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 16}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 1.6876495255790516e-05, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 20, 'global_max_steps': 16}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 16}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 5.588857190057775e-06, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 8, 'global_max_steps': 16}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 9.711865003865157e-06, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 16}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 2.0896439261730886e-05, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 12, 'global_max_steps': 16}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 2.0896439261730886e-05, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 12, 'global_max_steps': 16}}\n",
      "{'Current Learner': 'transformer', 'Current Sample': 1000, 'Current Hyper-parameters': {'learning_rate': 9.711865003865154e-06, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 14, 'global_max_steps': 16}, 'Best Learner': 'transformer', 'Best Hyper-parameters': {'learning_rate': 2.0896439261730886e-05, 'num_train_epochs': 1, 'per_device_train_batch_size': 64, 'seed': 12, 'global_max_steps': 16}}\n",
      "9\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlEAAAHHCAYAAACfqw0dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSyklEQVR4nO3de1yUVeI/8M8MyHBzBpSbKBeR9YKKIgiSGWokmuElN81VQXPbdLVUypSvG+TWLmyxid3UdNcsM81NLd0EDaS8sKIgJpBapmnIRRcZEOQ2c35/+ONZJ0DhERgYP+/Xa14v5zzneZ5zBpj5eJ7znFEIIQSIiIiIqEWUxm4AERERUWfEEEVEREQkA0MUERERkQwMUUREREQyMEQRERERycAQRURERCQDQxQRERGRDAxRRERERDIwRBERERHJwBBFRA80T09PzJ0719jNIKJOiCGKiO7bhx9+CIVCgZMnTxq7KZ1OVVUV1qxZg6CgIGg0GlhaWqJv375YvHgxzp8/b+zmEdFdmBu7AURExnTu3Dkolcb5/+T169cxfvx4ZGZm4oknnsDvfvc72Nra4ty5c9i+fTs++OAD1NTUGKVtRHRvDFFEZDLq6uqg1+thYWHR7H1UKlUbtuju5s6di1OnTuFf//oXpk2bZrDttddew6pVq1rlPHJeFyK6N17OI6J2k5+fj2eeeQbOzs5QqVQYOHAg/vnPfxrUqampQUxMDPz9/aHRaGBjY4NRo0bh0KFDBvUuXboEhUKBhIQEJCYmok+fPlCpVMjLy8Orr74KhUKBH3/8EXPnzoWdnR00Gg3mzZuHyspKg+P8ek5U/aXJo0ePIioqCo6OjrCxscHUqVNx7do1g331ej1effVVuLq6wtraGmPGjEFeXl6z5lkdP34c//73vzF//vwGAQq4He4SEhKk56NHj8bo0aMb1Js7dy48PT3v+bqcOnUK5ubmWL16dYNjnDt3DgqFAu+++65UVlpaiqVLl8LNzQ0qlQre3t7429/+Br1ef9d+ET1IOBJFRO2iqKgII0aMgEKhwOLFi+Ho6Ij9+/dj/vz5KCsrw9KlSwEAZWVl2LRpE2bOnIlnn30W5eXl+Mc//oGwsDBkZGRg6NChBsfdvHkzqqqq8Ic//AEqlQrdunWTtk2fPh29e/dGXFwcsrKysGnTJjg5OeFvf/vbPdv7/PPPw97eHrGxsbh06RISExOxePFi7NixQ6oTHR2NN954A+Hh4QgLC8Pp06cRFhaGqqqqex7/yy+/BADMmTOnGa9ey/36denRowdCQkLw2WefITY21qDujh07YGZmhqeeegoAUFlZiZCQEOTn5+O5556Du7s7jh07hujoaBQUFCAxMbFN2kzU6Qgiovu0efNmAUCcOHGiyTrz588XPXr0ENevXzcof/rpp4VGoxGVlZVCCCHq6upEdXW1QZ0bN24IZ2dn8cwzz0hlFy9eFACEWq0WxcXFBvVjY2MFAIP6QggxdepU0b17d4MyDw8PERkZ2aAvoaGhQq/XS+XLli0TZmZmorS0VAghRGFhoTA3NxdTpkwxON6rr74qABgcszFTp04VAMSNGzfuWq9eSEiICAkJaVAeGRkpPDw8pOd3e102bNggAIgzZ84YlPv4+IixY8dKz1977TVhY2Mjzp8/b1Bv5cqVwszMTFy+fLlZbSYydbycR0RtTgiBzz//HOHh4RBC4Pr169IjLCwMWq0WWVlZAAAzMzNp7o5er0dJSQnq6uoQEBAg1bnTtGnT4Ojo2Oh5FyxYYPB81KhR+O9//4uysrJ7tvkPf/gDFAqFwb46nQ4///wzACAlJQV1dXX44x//aLDf888/f89jA5Da0LVr12bVb6nGXpcnn3wS5ubmBqNpOTk5yMvLw4wZM6SynTt3YtSoUbC3tzf4WYWGhkKn0+Hbb79tkzYTdTa8nEdEbe7atWsoLS3FBx98gA8++KDROsXFxdK/t2zZgr///e84e/YsamtrpfLevXs32K+xsnru7u4Gz+3t7QEAN27cgFqtvmub77YvAClMeXt7G9Tr1q2bVPdu6s9fXl4OOzu7e9ZvqcZeFwcHBzz66KP47LPP8NprrwG4fSnP3NwcTz75pFTvhx9+wHfffddkOL3zZ0X0IGOIIqI2Vz8Zefbs2YiMjGy0jq+vLwBg69atmDt3LqZMmYLly5fDyckJZmZmiIuLw4ULFxrsZ2Vl1eR5zczMGi0XQtyzzfezb3P0798fAHDmzBmMGjXqnvUVCkWj59bpdI3Wb+p1efrppzFv3jxkZ2dj6NCh+Oyzz/Doo4/CwcFBqqPX6/HYY4/h5ZdfbvQYffv2vWd7iR4EDFFE1OYcHR3RtWtX6HQ6hIaG3rXuv/71L3h5eWHXrl0Gl9N+PRna2Dw8PAAAP/74o8Goz3//+19ptOpuwsPDERcXh61btzYrRNnb2+Onn35qUF4/ItZcU6ZMwXPPPSdd0jt//jyio6MN6vTp0wc3b96858+K6EHHOVFE1ObMzMwwbdo0fP7558jJyWmw/c6lA+pHgO4cdTl+/DjS09PbvqEt8Oijj8Lc3Bzr1q0zKL9zmYC7CQ4Oxvjx47Fp0ybs2bOnwfaamhq89NJL0vM+ffrg7NmzBq/V6dOncfTo0Ra1287ODmFhYfjss8+wfft2WFhYYMqUKQZ1pk+fjvT0dCQnJzfYv7S0FHV1dS06J5Gp4kgUEbWaf/7zn0hKSmpQvmTJEsTHx+PQoUMICgrCs88+Cx8fH5SUlCArKwtff/01SkpKAABPPPEEdu3ahalTp2LixIm4ePEi1q9fDx8fH9y8ebO9u9QkZ2dnLFmyBH//+98xadIkjB8/HqdPn8b+/fvh4OBgMIrWlI8++gjjxo3Dk08+ifDwcDz66KOwsbHBDz/8gO3bt6OgoEBaK+qZZ57BW2+9hbCwMMyfPx/FxcVYv349Bg4c2KyJ8neaMWMGZs+ejffffx9hYWEN5mQtX74cX375JZ544gnMnTsX/v7+qKiowJkzZ/Cvf/0Lly5dMrj8R/SgYogiolbz61GZenPnzkWvXr2QkZGBP//5z9i1axfef/99dO/eHQMHDjRYt2nu3LkoLCzEhg0bkJycDB8fH2zduhU7d+5EWlpaO/Wkef72t7/B2toaGzduxNdff43g4GAcOHAADz/8MCwtLe+5v6OjI44dO4b3338fO3bswKpVq1BTUwMPDw9MmjQJS5YskeoOGDAAH330EWJiYhAVFQUfHx98/PHH2LZtW4tfl0mTJsHKygrl5eUGd+XVs7a2xjfffIO//vWv2LlzJz766COo1Wr07dsXq1evhkajadH5iEyVQrTWLEkiIkJpaSns7e3x+uuvt9rXthBRx8Q5UUREMt26datBWf1q3o19RQsRmRZeziMikmnHjh348MMP8fjjj8PW1hZHjhzBp59+inHjxmHkyJHGbh4RtTGGKCIimXx9fWFubo433ngDZWVl0mTz119/3dhNI6J2wDlRRERERDJwThQRERGRDAxRRERERDJwTlQb0uv1uHr1Krp27dqshfeIiIjI+IQQKC8vh6urK5TKpsebGKLa0NWrV+Hm5mbsZhAREZEMV65cQa9evZrczhDVhrp27Qrg9g9BrVYbuTVERETUHGVlZXBzc5M+x5vCENWG6i/hqdVqhigiIqJO5l5TcTixnIiIiEgGhigiIiIiGRiiiIiIiGRgiCIiIiKSgSGKiIiISAaGKCIiIiIZGKKIiIiIZGCIIiIiIpKBIYqIiIhIBq5YTkREZEJ0eoGMiyUoLq+CU1dLBPbuBjPl3VfeJnkYooiIiExEUk4BVu/NQ4G2SirrobFEbLgPxg/qYcSWmSZeziMiIjIBSTkFWLg1yyBAAUChtgoLt2YhKafASC0zXRyJIiIi6uR0eoHVe/MgGtlWXxb7ZS5GejuY3KU9qy5m9/yi4LbCEEVERNTJZVwsaTAC9WtFZdUY/OqBdmpR+8n7cxisLYwTZ3g5j4iIqJMrLr97gKK2wZEoIiKiTs6pq2Wz6m2eOxxBXt3auDXty6qLmdHOzRBFRETUyQX27oYeGksUaqsanRe
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "from flaml.data import get_output_from_log\n",
    "time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \\\n",
    "    get_output_from_log(filename=automl_settings['log_file_name'], time_budget=3000)\n",
    "for config in config_history:\n",
    "    print(config)\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "plt.title('Learning Curve')\n",
    "plt.xlabel('Wall Clock Time (s)')\n",
    "plt.ylabel('Rouge 1')\n",
    "print(len(valid_loss_history))\n",
    "plt.scatter(time_history, 1 - np.array(valid_loss_history))\n",
    "plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "provenance": []
  },
  "gpuClass": "standard",
  "interpreter": {
   "hash": "e9d36fc5b7c3dd4177ff1b60184dd696c0acc18150a44682abca4d769811bd46"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}