mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-25 06:04:53 +00:00
feat: add ubuntu setup script (#279)
This commit is contained in:
parent
9062d25d0d
commit
a79b365ab4
@ -1,9 +1,13 @@
|
||||
## 0.4.16-dev0
|
||||
## 0.4.16-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
* Fallback to using file extensions for filetype detection if `libmagic` is not present
|
||||
|
||||
### Features
|
||||
|
||||
* Added setup script for Ubuntu
|
||||
|
||||
## 0.4.15
|
||||
|
||||
### Enhancements
|
||||
|
||||
4
Makefile
4
Makefile
@ -130,7 +130,7 @@ check-scripts:
|
||||
.PHONY: check-version
|
||||
check-version:
|
||||
# Fail if syncing version would produce changes
|
||||
scripts/version-sync.sh -c
|
||||
scripts/version-sync.sh -c -f "unstructured/__version__.py" semver
|
||||
|
||||
## tidy: run black
|
||||
.PHONY: tidy
|
||||
@ -141,7 +141,7 @@ tidy:
|
||||
## version-sync: update __version__.py with most recent version from CHANGELOG.md
|
||||
.PHONY: version-sync
|
||||
version-sync:
|
||||
scripts/version-sync.sh
|
||||
scripts/version-sync.sh -f "unstructured/__version__.py" semver
|
||||
|
||||
.PHONY: check-coverage
|
||||
check-coverage:
|
||||
|
||||
86
scripts/setup_ubuntu.sh
Executable file
86
scripts/setup_ubuntu.sh
Executable file
@ -0,0 +1,86 @@
|
||||
#!/bin/bash
|
||||
set +u -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "When running this script, please supply the name of the user account for which to set up unstructured dependencies."
|
||||
echo "Ex: ${0} abertl"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -ux
|
||||
|
||||
# Set user account for which we're configuring the tools
|
||||
USER_ACCOUNT=$1
|
||||
|
||||
# Set package manager command for this distribution
|
||||
pac="apt"
|
||||
|
||||
# If we're not running as root, we want to prefix certain commands with sudo
|
||||
if [[ $(whoami) == 'root' ]]; then
|
||||
sudo=''; else
|
||||
sudo='sudo'
|
||||
fi
|
||||
|
||||
# Update existing packages
|
||||
# Reconfigure the service that detects the need for service restarts from interactive mode (user
|
||||
# needs to manually confirm which services to restart) to automatic. If we don't do this we'll
|
||||
# get hung up on a screen asking us which services we want to restart after upgrading packages.
|
||||
$sudo $pac update -y
|
||||
if [[ -d /etc/needrestart/conf.d ]]; then
|
||||
# shellcheck disable=SC2016
|
||||
echo '$nrconf{restart} = '"'a';" | $sudo tee /etc/needrestart/conf.d/99z_temp_disable.conf
|
||||
fi
|
||||
$sudo $pac upgrade -y
|
||||
|
||||
#### Git
|
||||
# Install git
|
||||
$sudo $pac install -y git
|
||||
|
||||
#### Python
|
||||
# Install tools needed to build python
|
||||
$sudo $pac install -y curl gcc bzip2 sqlite zlib1g-dev libreadline-dev libsqlite3-dev libssl-dev tk-dev libffi-dev xz-utils make build-essential libbz2-dev wget llvm libncursesw5-dev libxml2-dev libxmlsec1-dev liblzma-dev
|
||||
# Install pyenv
|
||||
if [[ ! -d /home/$USER_ACCOUNT/.pyenv ]]; then
|
||||
sudo -u "$USER_ACCOUNT" -i <<'EOF'
|
||||
cd $HOME
|
||||
curl https://pyenv.run | bash
|
||||
EOF
|
||||
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '/eval "$(pyenv init -)"/d' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '/eval "$(pyenv virtualenv-init -)"/d' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# Add initialization lines to .bashrc
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '1ieval "$(pyenv virtualenv-init -)"' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '1ieval "$(pyenv init -)"' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '1icommand -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# shellcheck disable=SC2016
|
||||
sed -i '1iexport PYENV_ROOT="$HOME/.pyenv"' /home/"$USER_ACCOUNT"/.bashrc
|
||||
# install python
|
||||
sudo -u "$USER_ACCOUNT" -i <<'EOF'
|
||||
pyenv install 3.8.15
|
||||
EOF
|
||||
fi
|
||||
|
||||
#### OpenCV dependencies
|
||||
$sudo $pac install -y libgl1
|
||||
|
||||
#### Poppler
|
||||
# Install poppler
|
||||
$sudo $pac install -y poppler-utils
|
||||
|
||||
#### Tesseract
|
||||
# Install tesseract as well as Russian language
|
||||
$sudo $pac install -y tesseract-ocr libtesseract-dev tesseract-ocr-rus
|
||||
|
||||
#### Put needrestart back the way it was and clean up
|
||||
if [[ -d /etc/needrestart/conf.d/ ]]; then
|
||||
$sudo rm -f /etc/needrestart/conf.d/99z_temp_disable.conf
|
||||
fi
|
||||
@ -1,41 +1,110 @@
|
||||
#!/bin/bash
|
||||
function usage {
|
||||
echo "Usage: $(basename "$0") [-c] -f FILE_TO_CHANGE REPLACEMENT_FORMAT [-f FILE_TO_CHANGE REPLACEMENT_FORMAT ...]" 2>&1
|
||||
echo 'Synchronize files to latest version in source file'
|
||||
echo ' -s Specifies source file for version (default is CHANGELOG.md)'
|
||||
echo ' -f Specifies a file to change and the format for searching and replacing versions'
|
||||
echo ' FILE_TO_CHANGE is the file to be updated/checked for updates'
|
||||
echo ' REPLACEMENT_FORMAT is one of (semver, release, api-release)'
|
||||
echo ' semver indicates to look for a full semver version and replace with the latest full version'
|
||||
echo ' release indicates to look for a release semver version (x.x.x) and replace with the latest release version'
|
||||
echo ' api-release indicates to look for a release semver version in the context of an api route and replace with the latest release version'
|
||||
echo ' -c Compare versions and output proposed changes without changing anything.'
|
||||
}
|
||||
|
||||
CHECK=0
|
||||
while getopts ":c" opt; do
|
||||
function getopts-extra () {
|
||||
declare i=1
|
||||
# if the next argument is not an option, then append it to array OPTARG
|
||||
while [[ ${OPTIND} -le $# && ${!OPTIND:0:1} != '-' ]]; do
|
||||
OPTARG[i]=${!OPTIND}
|
||||
i+=1
|
||||
OPTIND+=1
|
||||
done
|
||||
}
|
||||
|
||||
# Parse input options
|
||||
declare CHECK=0
|
||||
declare SOURCE_FILE="CHANGELOG.md"
|
||||
declare -a FILES_TO_CHECK=()
|
||||
declare -a REPLACEMENT_FORMATS=()
|
||||
declare args
|
||||
declare OPTIND OPTARG opt
|
||||
while getopts ":hcs:f:" opt; do
|
||||
case $opt in
|
||||
h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
c)
|
||||
CHECK=1
|
||||
;;
|
||||
s)
|
||||
SOURCE_FILE="$OPTARG"
|
||||
;;
|
||||
f)
|
||||
getopts-extra "$@"
|
||||
args=( "${OPTARG[@]}" )
|
||||
# validate length of args, should be 2
|
||||
if [ ${#args[@]} -eq 2 ]; then
|
||||
FILES_TO_CHECK+=( "${args[0]}" )
|
||||
REPLACEMENT_FORMATS+=( "${args[1]}" )
|
||||
else
|
||||
echo "Exactly 2 arguments must follow -f option." >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
\?)
|
||||
echo "Invalid option: -$OPTARG. Use -c to show changes without applying, use no options to apply changes." >&2
|
||||
echo "Invalid option: -$OPTARG." >&2
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Version appearing earliest in CHANGELOGFILE will be used as ground truth.
|
||||
CHANGELOGFILE="CHANGELOG.md"
|
||||
VERSIONFILE="unstructured/__version__.py"
|
||||
RE_SEMVER_FULL="(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-((0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?"
|
||||
# Pull out semver appearing earliest in CHANGELOGFILE.
|
||||
LAST_VERSION=$(grep -o -m 1 -E "${RE_SEMVER_FULL}" "$CHANGELOGFILE")
|
||||
# Parse REPLACEMENT_FORMATS
|
||||
RE_SEMVER_FULL='(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-((0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?'
|
||||
RE_RELEASE="(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)"
|
||||
RE_API_RELEASE="v(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)"
|
||||
# Pull out semver appearing earliest in SOURCE_FILE.
|
||||
LAST_VERSION=$(grep -o -m 1 -E "${RE_SEMVER_FULL}" "$SOURCE_FILE")
|
||||
LAST_RELEASE=$(grep -o -m 1 -E "${RE_RELEASE}($|[^-+])$" "$SOURCE_FILE" | grep -o -m 1 -E "${RE_RELEASE}")
|
||||
LAST_API_RELEASE="v$(grep -o -m 1 -E "${RE_RELEASE}($|[^-+])$" "$SOURCE_FILE" | grep -o -m 1 -E "${RE_RELEASE}")"
|
||||
declare -a RE_SEMVERS=()
|
||||
declare -a UPDATED_VERSIONS=()
|
||||
for i in "${!REPLACEMENT_FORMATS[@]}"; do
|
||||
REPLACEMENT_FORMAT=${REPLACEMENT_FORMATS[$i]}
|
||||
case $REPLACEMENT_FORMAT in
|
||||
semver)
|
||||
RE_SEMVERS+=( "$RE_SEMVER_FULL" )
|
||||
UPDATED_VERSIONS+=( "$LAST_VERSION" )
|
||||
;;
|
||||
release)
|
||||
RE_SEMVERS+=( "$RE_RELEASE" )
|
||||
UPDATED_VERSIONS+=( "$LAST_RELEASE" )
|
||||
;;
|
||||
api-release)
|
||||
RE_SEMVERS+=( "$RE_API_RELEASE" )
|
||||
UPDATED_VERSIONS+=( "$LAST_API_RELEASE" )
|
||||
;;
|
||||
*)
|
||||
echo "Invalid replacement format: \"${REPLACEMENT_FORMAT}\". Use semver, release, or api-release" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$LAST_VERSION" ];
|
||||
then
|
||||
# No match to semver regex in CHANGELOGFILE, so no version to go from.
|
||||
printf "Error: Unable to find latest version from %s.\n" "$CHANGELOGFILE"
|
||||
# No match to semver regex in SOURCE_FILE, so no version to go from.
|
||||
printf "Error: Unable to find latest version from %s.\n" "$SOURCE_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Add files to this array that need to be kept in sync.
|
||||
FILES_TO_CHANGE=("$VERSIONFILE")
|
||||
# Add patterns to this array to be matched in the above files.
|
||||
RE_SEMVERS=("$RE_SEMVER_FULL")
|
||||
# Add versions to this array to be used as replacements for the patterns matched above from the corresponding files.
|
||||
UPDATED_VERSIONS=("$LAST_VERSION")
|
||||
# Search files in FILES_TO_CHECK and change (or get diffs)
|
||||
declare FAILED_CHECK=0
|
||||
|
||||
for i in "${!FILES_TO_CHANGE[@]}"; do
|
||||
FILE_TO_CHANGE=${FILES_TO_CHANGE[$i]}
|
||||
for i in "${!FILES_TO_CHECK[@]}"; do
|
||||
FILE_TO_CHANGE=${FILES_TO_CHECK[$i]}
|
||||
RE_SEMVER=${RE_SEMVERS[$i]}
|
||||
UPDATED_VERSION=${UPDATED_VERSIONS[$i]}
|
||||
FILE_VERSION=$(grep -o -m 1 -E "${RE_SEMVER}" "$FILE_TO_CHANGE")
|
||||
@ -45,7 +114,7 @@ for i in "${!FILES_TO_CHANGE[@]}"; do
|
||||
printf "Error: No semver version found in file %s.\n" "$FILE_TO_CHANGE"
|
||||
exit 1
|
||||
else
|
||||
# Replace semver in VERSIONFILE with semver obtained from CHANGELOGFILE
|
||||
# Replace semver in VERSIONFILE with semver obtained from SOURCE_FILE
|
||||
TMPFILE=$(mktemp /tmp/new_version.XXXXXX)
|
||||
# Check sed version, exit if version < 4.3
|
||||
if ! sed --version > /dev/null 2>&1; then
|
||||
@ -63,13 +132,12 @@ for i in "${!FILES_TO_CHANGE[@]}"; do
|
||||
DIFF=$(diff "$FILE_TO_CHANGE" "$TMPFILE" )
|
||||
if [ -z "$DIFF" ];
|
||||
then
|
||||
printf "version sync would make no changes.\n"
|
||||
printf "version sync would make no changes to %s.\n" "$FILE_TO_CHANGE"
|
||||
rm "$TMPFILE"
|
||||
exit 0
|
||||
else
|
||||
printf "version sync would make the following changes:\n%s\n" "$DIFF"
|
||||
FAILED_CHECK=1
|
||||
printf "version sync would make the following changes to %s:\n%s\n" "$FILE_TO_CHANGE" "$DIFF"
|
||||
rm "$TMPFILE"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
cp "$TMPFILE" "$FILE_TO_CHANGE"
|
||||
@ -77,3 +145,10 @@ for i in "${!FILES_TO_CHANGE[@]}"; do
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Exit with code determined by whether changes were needed in a check.
|
||||
if [ ${FAILED_CHECK} -ne 0 ]; then
|
||||
exit 1
|
||||
else
|
||||
exit 0
|
||||
fi
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.4.16-dev0" # pragma: no cover
|
||||
__version__ = "0.4.16-dev1" # pragma: no cover
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user