Cleaned up Dockerfile for agnext (#173)

Co-authored-by: gagb <gagb@users.noreply.github.com>
This commit is contained in:
afourney 2024-07-03 00:10:24 -07:00 committed by GitHub
parent 99ecb5ec7f
commit 2e7c7e2d82

View File

@ -1,7 +1,8 @@
FROM python:3.11 FROM python:3.11
MAINTAINER AutoGen MAINTAINER AGNext
# Install packages # Install packages
# ffmpeg and exiftool are needed for mdconvert
RUN apt-get update && apt-get install ffmpeg exiftool -y RUN apt-get update && apt-get install ffmpeg exiftool -y
# Set the image to the Pacific Timezone # Set the image to the Pacific Timezone
@ -10,36 +11,27 @@ RUN ln -snf /usr/share/zoneinfo/US/Pacific /etc/localtime && echo "US/Pacific" >
# Upgrade pip # Upgrade pip
RUN pip install --upgrade pip RUN pip install --upgrade pip
# Pre-load autogen dependencies, but not autogen itself since we'll often want to install the latest from source # Pre-load agnext dependencies, but not agnext itself since we'll often want to install the latest from source
RUN pip install pyautogen[teachable,lmm,graphs,websurfer] RUN pip install openai pillow aiohttp typing-extensions pydantic types-aiofiles grpcio protobuf
RUN pip uninstall --yes pyautogen
# Pre-load popular packages as per https://learnpython.com/blog/most-popular-python-packages/ # Pre-load popular packages as per https://learnpython.com/blog/most-popular-python-packages/
RUN pip install numpy pandas matplotlib seaborn scikit-learn requests urllib3 nltk pillow pytest RUN pip install numpy pandas matplotlib seaborn scikit-learn requests urllib3 nltk pytest
# Pre-load packages needed for complex_task file utils # Pre-load packages needed for mdconvert file utils
RUN pip install python-docx pdfminer.six requests pillow easyocr python-pptx SpeechRecognition pandas openpyxl pydub mammoth puremagic youtube_transcript_api==0.6.0 RUN pip install python-docx pdfminer.six python-pptx SpeechRecognition openpyxl pydub mammoth puremagic youtube_transcript_api==0.6.0
# easyocr
# Pre-load Selenium and Playwright # Pre-load Playwright
RUN pip install selenium playwright RUN pip install playwright
# Chromium playwright
RUN playwright install --with-deps chromium RUN playwright install --with-deps chromium
RUN playwright install --with-deps firefox
# Chrome for Selenium (need to run dpkg twice to resolve dependencies)
# RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
# RUN dpkg -i google-chrome-stable_current_amd64.deb || :
# RUN apt -f install -y
# RUN dpkg -i google-chrome-stable_current_amd64.deb
# Fix an incompatibility with numpy # Fix an incompatibility with numpy
RUN pip uninstall --yes numpy RUN pip uninstall --yes numpy
RUN pip install "numpy<2.0" RUN pip install "numpy<2.0"
# Pre-load the OCR model # Pre-load the OCR model
RUN /usr/bin/echo -e "import easyocr\nreader = easyocr.Reader(['en'])" | python #RUN /usr/bin/echo -e "import easyocr\nreader = easyocr.Reader(['en'])" | python
# Webarena # Webarena (evaluation code)
RUN pip install beartype aiolimiter RUN pip install beartype aiolimiter
RUN /usr/bin/echo -e "import nltk\nnltk.download('punkt')" | python RUN /usr/bin/echo -e "import nltk\nnltk.download('punkt')" | python