2022-06-29 14:35:19 -04:00
|
|
|
PACKAGE_NAME := unstructured
|
2023-05-08 23:08:10 -07:00
|
|
|
PIP_VERSION := 23.1.2
|
2023-03-21 13:46:09 -07:00
|
|
|
CURRENT_DIR := $(shell pwd)
|
2023-05-05 17:16:28 -07:00
|
|
|
ARCH := $(shell uname -m)
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
.PHONY: help
|
|
|
|
help: Makefile
|
|
|
|
@sed -n 's/^\(## \)\([a-zA-Z]\)/\2/p' $<
|
|
|
|
|
|
|
|
|
|
|
|
###########
|
|
|
|
# Install #
|
|
|
|
###########
|
|
|
|
|
|
|
|
## install-base: installs core requirements needed for text processing bricks
|
|
|
|
.PHONY: install-base
|
|
|
|
install-base: install-base-pip-packages install-nltk-models
|
|
|
|
|
|
|
|
## install: installs all test, dev, and experimental requirements
|
|
|
|
.PHONY: install
|
2023-01-04 16:19:05 -06:00
|
|
|
install: install-base-pip-packages install-dev install-nltk-models install-test install-huggingface install-unstructured-inference
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
.PHONY: install-ci
|
2023-01-09 17:03:09 -05:00
|
|
|
install-ci: install-base-pip-packages install-nltk-models install-huggingface install-unstructured-inference install-test
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
.PHONY: install-base-pip-packages
|
|
|
|
install-base-pip-packages:
|
|
|
|
python3 -m pip install pip==${PIP_VERSION}
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/base.txt
|
2022-06-29 14:35:19 -04:00
|
|
|
|
2022-10-13 11:18:27 -04:00
|
|
|
.PHONY: install-huggingface
|
|
|
|
install-huggingface:
|
|
|
|
python3 -m pip install pip==${PIP_VERSION}
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/huggingface.txt
|
2022-10-13 11:18:27 -04:00
|
|
|
|
2022-06-29 14:35:19 -04:00
|
|
|
.PHONE: install-nltk-models
|
|
|
|
install-nltk-models:
|
|
|
|
python -c "import nltk; nltk.download('punkt')"
|
|
|
|
python -c "import nltk; nltk.download('averaged_perceptron_tagger')"
|
|
|
|
|
|
|
|
.PHONY: install-test
|
|
|
|
install-test:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/test.txt
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
.PHONY: install-dev
|
|
|
|
install-dev:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/dev.txt
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
.PHONY: install-build
|
|
|
|
install-build:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/build.txt
|
2022-06-29 14:35:19 -04:00
|
|
|
|
2023-03-07 06:01:02 +00:00
|
|
|
.PHONY: install-ingest-google-drive
|
|
|
|
install-ingest-google-drive:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/ingest-google-drive.txt
|
2023-03-07 06:01:02 +00:00
|
|
|
|
2023-02-14 12:27:45 -08:00
|
|
|
## install-ingest-s3: install requirements for the s3 connector
|
|
|
|
.PHONY: install-ingest-s3
|
|
|
|
install-ingest-s3:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/ingest-s3.txt
|
2023-02-14 12:27:45 -08:00
|
|
|
|
2023-03-11 00:43:40 +01:00
|
|
|
.PHONY: install-ingest-azure
|
|
|
|
install-ingest-azure:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/ingest-azure.txt
|
2023-03-11 00:43:40 +01:00
|
|
|
|
2023-05-16 11:46:30 -07:00
|
|
|
.PHONY: install-ingest-discord
|
|
|
|
install-ingest-discord:
|
|
|
|
pip install -r requirements/ingest-discord.txt
|
|
|
|
|
2023-02-27 23:36:44 +01:00
|
|
|
.PHONY: install-ingest-github
|
|
|
|
install-ingest-github:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/ingest-github.txt
|
2023-02-27 23:36:44 +01:00
|
|
|
|
2023-03-08 09:15:21 +01:00
|
|
|
.PHONY: install-ingest-gitlab
|
|
|
|
install-ingest-gitlab:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/ingest-gitlab.txt
|
2023-03-08 09:15:21 +01:00
|
|
|
|
2023-02-27 09:11:04 +01:00
|
|
|
.PHONY: install-ingest-reddit
|
|
|
|
install-ingest-reddit:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/ingest-reddit.txt
|
2023-02-27 09:11:04 +01:00
|
|
|
|
2023-04-16 12:34:43 -07:00
|
|
|
.PHONY: install-ingest-slack
|
|
|
|
install-ingest-slack:
|
|
|
|
pip install -r requirements/ingest-slack.txt
|
|
|
|
|
2023-02-28 09:25:11 +01:00
|
|
|
.PHONY: install-ingest-wikipedia
|
|
|
|
install-ingest-wikipedia:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/ingest-wikipedia.txt
|
2023-02-28 09:25:11 +01:00
|
|
|
|
2023-01-04 16:19:05 -06:00
|
|
|
.PHONY: install-unstructured-inference
|
|
|
|
install-unstructured-inference:
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install -r requirements/local-inference.txt
|
2023-01-04 16:19:05 -06:00
|
|
|
|
2023-05-05 17:16:28 -07:00
|
|
|
.PHONY: install-tensorboard
|
|
|
|
install-tensorboard:
|
|
|
|
@if [ ${ARCH} = "arm64" ] || [ ${ARCH} = "aarch64" ]; then\
|
|
|
|
python3 -m pip install tensorboard>=2.12.2;\
|
|
|
|
fi
|
|
|
|
|
2023-01-04 16:19:05 -06:00
|
|
|
.PHONY: install-detectron2
|
2023-05-05 17:16:28 -07:00
|
|
|
install-detectron2: install-tensorboard
|
2023-04-11 00:11:50 -07:00
|
|
|
python3 -m pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2"
|
2023-01-04 16:19:05 -06:00
|
|
|
|
|
|
|
## install-local-inference: installs requirements for local inference
|
|
|
|
.PHONY: install-local-inference
|
|
|
|
install-local-inference: install install-unstructured-inference install-detectron2
|
|
|
|
|
2023-05-26 15:38:48 -04:00
|
|
|
.PHONY: install-pandoc
|
|
|
|
install-pandoc:
|
|
|
|
ARCH=${ARCH} ./scripts/install-pandoc.sh
|
|
|
|
|
|
|
|
|
2022-06-29 14:35:19 -04:00
|
|
|
## pip-compile: compiles all base/dev/test requirements
|
|
|
|
.PHONY: pip-compile
|
|
|
|
pip-compile:
|
2023-05-24 17:29:35 -05:00
|
|
|
pip-compile --upgrade requirements/base.in
|
2022-10-13 11:18:27 -04:00
|
|
|
# Extra requirements for huggingface staging functions
|
2023-05-24 17:29:35 -05:00
|
|
|
pip-compile --upgrade requirements/huggingface.in
|
2022-06-29 14:35:19 -04:00
|
|
|
# NOTE(robinson) - We want the dependencies for detectron2 in the requirements.txt, but not
|
|
|
|
# the detectron2 repo itself. If detectron2 is in the requirements.txt file, an order of
|
|
|
|
# operations issue related to the torch library causes the install to fail
|
2023-01-30 11:05:48 -05:00
|
|
|
pip-compile --upgrade requirements/test.in
|
2023-05-24 17:29:35 -05:00
|
|
|
pip-compile --upgrade requirements/dev.in
|
2023-01-30 11:05:48 -05:00
|
|
|
pip-compile --upgrade requirements/build.in
|
2023-05-24 17:29:35 -05:00
|
|
|
pip-compile --upgrade requirements/local-inference.in
|
2022-06-29 14:35:19 -04:00
|
|
|
# NOTE(robinson) - doc/requirements.txt is where the GitHub action for building
|
|
|
|
# sphinx docs looks for additional requirements
|
|
|
|
cp requirements/build.txt docs/requirements.txt
|
2023-05-24 17:29:35 -05:00
|
|
|
pip-compile --upgrade requirements/ingest-s3.in
|
|
|
|
pip-compile --upgrade requirements/ingest-azure.in
|
|
|
|
pip-compile --upgrade requirements/ingest-discord.in
|
|
|
|
pip-compile --upgrade requirements/ingest-reddit.in
|
|
|
|
pip-compile --upgrade requirements/ingest-github.in
|
|
|
|
pip-compile --upgrade requirements/ingest-gitlab.in
|
|
|
|
pip-compile --upgrade requirements/ingest-slack.in
|
|
|
|
pip-compile --upgrade requirements/ingest-wikipedia.in
|
|
|
|
pip-compile --upgrade requirements/ingest-google-drive.in
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
## install-project-local: install unstructured into your local python environment
|
|
|
|
.PHONY: install-project-local
|
|
|
|
install-project-local: install
|
|
|
|
# MAYBE TODO: fail if already exists?
|
|
|
|
pip install -e .
|
|
|
|
|
|
|
|
## uninstall-project-local: uninstall unstructured from your local python environment
|
|
|
|
.PHONY: uninstall-project-local
|
|
|
|
uninstall-project-local:
|
|
|
|
pip uninstall ${PACKAGE_NAME}
|
|
|
|
|
|
|
|
#################
|
|
|
|
# Test and Lint #
|
|
|
|
#################
|
|
|
|
|
|
|
|
## test: runs all unittests
|
|
|
|
.PHONY: test
|
|
|
|
test:
|
|
|
|
PYTHONPATH=. pytest test_${PACKAGE_NAME} --cov=${PACKAGE_NAME} --cov-report term-missing
|
|
|
|
|
|
|
|
## check: runs linters (includes tests)
|
|
|
|
.PHONY: check
|
2022-10-10 13:11:48 -05:00
|
|
|
check: check-src check-tests check-version
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
## check-src: runs linters (source only, no tests)
|
|
|
|
.PHONY: check-src
|
|
|
|
check-src:
|
2023-02-27 17:30:54 +01:00
|
|
|
ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --ignore PT011,PT012,SIM117
|
2022-06-29 14:35:19 -04:00
|
|
|
black --line-length 100 ${PACKAGE_NAME} --check
|
|
|
|
flake8 ${PACKAGE_NAME}
|
2022-11-14 17:57:05 +00:00
|
|
|
mypy ${PACKAGE_NAME} --ignore-missing-imports --check-untyped-defs
|
2022-06-29 14:35:19 -04:00
|
|
|
|
|
|
|
.PHONY: check-tests
|
|
|
|
check-tests:
|
|
|
|
black --line-length 100 test_${PACKAGE_NAME} --check
|
|
|
|
flake8 test_${PACKAGE_NAME}
|
|
|
|
|
2022-09-29 15:24:28 -04:00
|
|
|
## check-scripts: run shellcheck
|
|
|
|
.PHONY: check-scripts
|
|
|
|
check-scripts:
|
|
|
|
# Fail if any of these files have warnings
|
|
|
|
scripts/shellcheck.sh
|
|
|
|
|
2022-10-10 13:11:48 -05:00
|
|
|
## check-version: run check to ensure version in CHANGELOG.md matches version in package
|
|
|
|
.PHONY: check-version
|
|
|
|
check-version:
|
|
|
|
# Fail if syncing version would produce changes
|
2023-04-21 05:48:38 +09:00
|
|
|
scripts/version-sync.sh -c \
|
|
|
|
-f "unstructured/__version__.py" semver
|
2022-10-10 13:11:48 -05:00
|
|
|
|
2022-06-29 14:35:19 -04:00
|
|
|
## tidy: run black
|
|
|
|
.PHONY: tidy
|
|
|
|
tidy:
|
2023-02-27 17:30:54 +01:00
|
|
|
ruff . --select I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402 --fix-only || true
|
2022-06-29 14:35:19 -04:00
|
|
|
black --line-length 100 ${PACKAGE_NAME}
|
|
|
|
black --line-length 100 test_${PACKAGE_NAME}
|
|
|
|
|
2022-10-10 13:11:48 -05:00
|
|
|
## version-sync: update __version__.py with most recent version from CHANGELOG.md
|
|
|
|
.PHONY: version-sync
|
|
|
|
version-sync:
|
2023-04-21 05:48:38 +09:00
|
|
|
scripts/version-sync.sh \
|
|
|
|
-f "unstructured/__version__.py" semver
|
2022-10-10 13:11:48 -05:00
|
|
|
|
2022-06-29 14:35:19 -04:00
|
|
|
.PHONY: check-coverage
|
|
|
|
check-coverage:
|
|
|
|
coverage report --fail-under=95
|
2023-03-14 13:40:01 -07:00
|
|
|
|
2023-05-24 17:29:35 -05:00
|
|
|
## check-deps: check consistency of dependencies
|
|
|
|
.PHONY: check-deps
|
|
|
|
check-deps:
|
|
|
|
scripts/consistent-deps.sh
|
|
|
|
|
2023-03-14 13:40:01 -07:00
|
|
|
##########
|
|
|
|
# Docker #
|
|
|
|
##########
|
|
|
|
|
|
|
|
# Docker targets are provided for convenience only and are not required in a standard development environment
|
|
|
|
|
2023-04-06 00:34:07 -07:00
|
|
|
DOCKER_IMAGE ?= unstructured:dev
|
2023-03-29 00:02:39 -07:00
|
|
|
|
2023-03-14 13:40:01 -07:00
|
|
|
.PHONY: docker-build
|
|
|
|
docker-build:
|
2023-04-06 00:34:07 -07:00
|
|
|
PIP_VERSION=${PIP_VERSION} DOCKER_IMAGE_NAME=${DOCKER_IMAGE} ./scripts/docker-build.sh
|
2023-03-14 13:40:01 -07:00
|
|
|
|
|
|
|
.PHONY: docker-start-bash
|
|
|
|
docker-start-bash:
|
2023-04-06 00:34:07 -07:00
|
|
|
docker run -ti --rm ${DOCKER_IMAGE}
|
2023-03-21 13:46:09 -07:00
|
|
|
|
|
|
|
.PHONY: docker-test
|
|
|
|
docker-test:
|
2023-04-06 00:34:07 -07:00
|
|
|
docker run --rm \
|
2023-03-29 00:02:39 -07:00
|
|
|
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured \
|
2023-04-17 23:11:21 -07:00
|
|
|
-v ${CURRENT_DIR}/test_unstructured_ingest:/home/test_unstructured_ingest \
|
2023-03-29 00:02:39 -07:00
|
|
|
$(DOCKER_IMAGE) \
|
|
|
|
bash -c "pytest $(if $(TEST_NAME),-k $(TEST_NAME),) test_unstructured"
|
2023-03-30 13:23:30 -07:00
|
|
|
|
|
|
|
.PHONY: docker-smoke-test
|
|
|
|
docker-smoke-test:
|
2023-04-06 10:03:42 -07:00
|
|
|
DOCKER_IMAGE=${DOCKER_IMAGE} ./scripts/docker-smoke-test.sh
|