feat: amazon linux 2 setup script (#350)

Added Amazon Linux 2 setup script. Also updated Ubuntu setup script to keep the scripts as aligned as possible.

Co-authored-by: cragwolfe <crag@unstructured.io>
This commit is contained in:
qued 2023-03-09 08:52:24 -06:00 committed by GitHub
parent 6be07a5260
commit e43e9178ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 150 additions and 36 deletions

View File

@ -1,4 +1,4 @@
## 0.5.3-dev5
## 0.5.3
### Enhancements
@ -10,6 +10,7 @@
* Add `--wikipedia-auto-suggest` argument to the ingest CLI to disable automatic redirection
to pages with similar names.
* Add setup script for Amazon Linux 2
* Add optional `encoding` argument to the `partition_(text/email/html)` functions.
* Added Google Drive connector for ingest cli.
* Added Gitlab connector for ingest cli.

111
scripts/setup_al2.sh Executable file
View File

@ -0,0 +1,111 @@
#!/bin/bash
set +u
if [ -z "$1" ]; then
echo "When running this script, please supply the name of the user account for which to set up unstructured dependencies."
echo "Ex: ${0} abertl"
exit 1
fi
set -eux
# Set package manager command for this distribution
pac="yum"
# If we're not running as root, we want to prefix certain commands with sudo
if [[ $(whoami) == 'root' ]]; then
$pac update -y
$pac install -y sudo
sudo=''; else
type -p sudo >/dev/null || (echo "Please have an administrator install sudo and add you to the sudo group before continuing." && exit 1)
sudo='sudo'
fi
# Set user account for which we're configuring the tools
USER_ACCOUNT=$1
# Update existing packages
$sudo $pac update -y
#### Utils
# Prerequisites
$sudo $pac install -y gcc wget tar curl make xz-devel
# Install non-ancient version of sed
wget http://ftp.gnu.org/gnu/sed/sed-4.9.tar.gz
tar xvf sed-4.9.tar.gz
cd sed-4.9/
./configure && make && $sudo make install
cd ..
#### Git
# Install git
$sudo $pac install -y git
#### Python
# Install tools needed to build python
$sudo $pac install -y bzip2 sqlite zlib-devel readline-devel sqlite-devel openssl-devel tk-devel libffi-devel bzip2-devel
# Install pyenv
sudo -u "$USER_ACCOUNT" -i <<'EOF'
if [[ ! -d "$HOME"/.pyenv ]]; then
cd $HOME
curl https://pyenv.run | bash
touch "$HOME"/.bashrc
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
# shellcheck disable=SC2016
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' "$HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' "$HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/eval "$(pyenv init -)"/d' "$HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/eval "$(pyenv virtualenv-init -)"/d' "$HOME"/.bashrc
# Add initialization lines to .bashrc
# shellcheck disable=SC2016
cat <<'EOT' | cat - "$HOME"/.bashrc > temp && mv temp "$HOME"/.bashrc
export PYENV_ROOT="$HOME/.pyenv"
command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
EOT
# install python
source "$HOME"/.bashrc
pyenv install 3.8.15
fi
EOF
#### OpenCV dependencies
$sudo $pac install -y mesa-libGL
#### Poppler
# Install poppler
$sudo $pac install -y poppler-utils
#### Tesseract
# Install dependencies for image and pdf manipulation
$sudo $pac install -y opencv opencv-devel opencv-python perl-core clang libpng-devel libtiff-devel libwebp-devel libjpeg-turbo-devel git-core libtool pkgconfig xz
# Install leptonica (tesseract dependency)
wget https://github.com/DanBloomberg/leptonica/releases/download/1.75.1/leptonica-1.75.1.tar.gz
tar -xzvf leptonica-1.75.1.tar.gz
cd leptonica-1.75.1
./configure && make && $sudo make install
cd ..
# Install autoconf-archive (tesseract dependency)
wget http://mirror.squ.edu.om/gnu/autoconf-archive/autoconf-archive-2017.09.28.tar.xz
tar -xvf autoconf-archive-2017.09.28.tar.xz
cd autoconf-archive-2017.09.28
./configure && make && $sudo make install
$sudo cp m4/* /usr/share/aclocal
cd ..
# Install tesseract
git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git tesseract-ocr
cd tesseract-ocr
export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
./autogen.sh
./configure && make && $sudo make install
cd ..
# Install tesseract languages
git clone https://github.com/tesseract-ocr/tessdata.git
$sudo cp tessdata/*.traineddata /usr/local/share/tessdata
#### libmagic
$sudo $pac install -y file-devel

View File

@ -1,5 +1,5 @@
#!/bin/bash
set +u -e
set +u
if [ -z "$1" ]; then
echo "When running this script, please supply the name of the user account for which to set up unstructured dependencies."
@ -7,11 +7,7 @@ if [ -z "$1" ]; then
exit 1
fi
set -ux
# Set user account for which we're configuring the tools
USER_ACCOUNT=$1
USER_ACCOUNT_HOME=$(bash -c "cd ~$(printf %q "$USER_ACCOUNT") && pwd")
set -eux
# Set package manager command for this distribution
pac="apt"
@ -25,6 +21,9 @@ if [[ $(whoami) == 'root' ]]; then
sudo='sudo'
fi
# Set user account for which we're configuring the tools
USER_ACCOUNT=$1
# Update existing packages
# Reconfigure the service that detects the need for service restarts from interactive mode (user
# needs to manually confirm which services to restart) to automatic. If we don't do this we'll
@ -36,42 +35,45 @@ if [[ -d /etc/needrestart/conf.d ]]; then
fi
$sudo $pac upgrade -y
#### Utils
# Prerequisites
$sudo env DEBIAN_FRONTEND="noninteractive" $pac install -y gcc wget tar curl make xz-utils build-essential tzdata
#### Git
# Install git
$sudo $pac install -y git
#### Python
# Install tools needed to build python
$sudo env DEBIAN_FRONTEND="noninteractive" $pac install -y curl gcc bzip2 sqlite zlib1g-dev libreadline-dev libsqlite3-dev libssl-dev tk-dev libffi-dev xz-utils make build-essential libbz2-dev wget llvm libncursesw5-dev libxml2-dev libxmlsec1-dev liblzma-dev
$sudo $pac install -y bzip2 sqlite zlib1g-dev libreadline-dev libsqlite3-dev libssl-dev tk-dev libffi-dev libbz2-dev llvm libncursesw5-dev libxml2-dev libxmlsec1-dev liblzma-dev
# Install pyenv
if [[ ! -d $USER_ACCOUNT_HOME/.pyenv ]]; then
sudo -u "$USER_ACCOUNT" -i <<'EOF'
cd $HOME
curl https://pyenv.run | bash
sudo -u "$USER_ACCOUNT" -i <<'EOF'
if [[ ! -d "$HOME"/.pyenv ]]; then
cd $HOME
curl https://pyenv.run | bash
touch "$HOME"/.bashrc
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
# shellcheck disable=SC2016
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' "$HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' "$HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/eval "$(pyenv init -)"/d' "$HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/eval "$(pyenv virtualenv-init -)"/d' "$HOME"/.bashrc
# Add initialization lines to .bashrc
# shellcheck disable=SC2016
cat <<'EOT' | cat - "$HOME"/.bashrc > temp && mv temp "$HOME"/.bashrc
export PYENV_ROOT="$HOME/.pyenv"
command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
EOT
# install python
source "$HOME"/.bashrc
pyenv install 3.8.15
fi
EOF
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
# shellcheck disable=SC2016
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' "$USER_ACCOUNT_HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' "$USER_ACCOUNT_HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/eval "$(pyenv init -)"/d' "$USER_ACCOUNT_HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '/eval "$(pyenv virtualenv-init -)"/d' "$USER_ACCOUNT_HOME"/.bashrc
# Add initialization lines to .bashrc
# shellcheck disable=SC2016
sed -i '1ieval "$(pyenv virtualenv-init -)"' "$USER_ACCOUNT_HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '1ieval "$(pyenv init -)"' "$USER_ACCOUNT_HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '1icommand -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' "$USER_ACCOUNT_HOME"/.bashrc
# shellcheck disable=SC2016
sed -i '1iexport PYENV_ROOT="$HOME/.pyenv"' "$USER_ACCOUNT_HOME"/.bashrc
# install python
sudo -u "$USER_ACCOUNT" -i <<'EOF'
pyenv install 3.8.15
EOF
fi
#### OpenCV dependencies
$sudo $pac install -y libgl1

View File

@ -1 +1 @@
__version__ = "0.5.3-dev5" # pragma: no cover
__version__ = "0.5.3" # pragma: no cover