mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
chore: add tests for docker (#373)
This commit is contained in:
parent
3c95b975fe
commit
a9da858fa3
31
.github/workflows/ci.yml
vendored
31
.github/workflows/ci.yml
vendored
@ -1,7 +1,7 @@
|
|||||||
name: CI
|
name: CI
|
||||||
|
|
||||||
on:
|
on:
|
||||||
# NOTE(robinson) - We are limiting when we run CI avoid exceeding our 2,000 min/month limt.
|
# NOTE(robinson) - We are limiting when we run CI avoid exceeding our 2,000 min/month limit.
|
||||||
# We can switch to running on push if we make this repo public or are fine with
|
# We can switch to running on push if we make this repo public or are fine with
|
||||||
# paying for CI minutes.
|
# paying for CI minutes.
|
||||||
push:
|
push:
|
||||||
@ -128,3 +128,32 @@ jobs:
|
|||||||
|
|
||||||
- if: steps.changes.outputs.src == 'true' && github.ref != 'refs/heads/main'
|
- if: steps.changes.outputs.src == 'true' && github.ref != 'refs/heads/main'
|
||||||
uses: dangoslen/changelog-enforcer@v3
|
uses: dangoslen/changelog-enforcer@v3
|
||||||
|
|
||||||
|
# TODO - figure out best practice for caching docker images
|
||||||
|
# (Using the virtualenv to get pytest)
|
||||||
|
test_dockerfile:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [ setup, lint ]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: actions/cache@v3
|
||||||
|
id: virtualenv-cache
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
.venv
|
||||||
|
nltk_data
|
||||||
|
key: unstructured-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements/*.txt') }}
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Setup virtual environment (no cache hit)
|
||||||
|
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
|
python${{ matrix.python-version }} -m venv .venv
|
||||||
|
- name: Test Dockerfile
|
||||||
|
run: |
|
||||||
|
source .venv/bin/activate
|
||||||
|
make docker-build
|
||||||
|
make docker-test
|
||||||
|
|
||||||
|
8
Makefile
8
Makefile
@ -1,5 +1,6 @@
|
|||||||
PACKAGE_NAME := unstructured
|
PACKAGE_NAME := unstructured
|
||||||
PIP_VERSION := 22.2.1
|
PIP_VERSION := 22.2.1
|
||||||
|
CURRENT_DIR := $(shell pwd)
|
||||||
|
|
||||||
|
|
||||||
.PHONY: help
|
.PHONY: help
|
||||||
@ -185,7 +186,6 @@ check-coverage:
|
|||||||
|
|
||||||
# Docker targets are provided for convenience only and are not required in a standard development environment
|
# Docker targets are provided for convenience only and are not required in a standard development environment
|
||||||
|
|
||||||
|
|
||||||
.PHONY: docker-build
|
.PHONY: docker-build
|
||||||
docker-build:
|
docker-build:
|
||||||
PIP_VERSION=${PIP_VERSION} ./scripts/docker-build.sh
|
PIP_VERSION=${PIP_VERSION} ./scripts/docker-build.sh
|
||||||
@ -193,3 +193,9 @@ docker-build:
|
|||||||
.PHONY: docker-start-bash
|
.PHONY: docker-start-bash
|
||||||
docker-start-bash:
|
docker-start-bash:
|
||||||
docker run --platform linux/amd64 -ti --rm unstructured-dev:latest
|
docker run --platform linux/amd64 -ti --rm unstructured-dev:latest
|
||||||
|
|
||||||
|
.PHONY: docker-test
|
||||||
|
docker-test:
|
||||||
|
docker run --platform linux/amd64 --rm \
|
||||||
|
-v ${CURRENT_DIR}/test_unstructured:/home/test_unstructured unstructured-dev:latest \
|
||||||
|
bash -c "pytest test_unstructured"
|
||||||
|
@ -10,6 +10,7 @@ from unstructured.file_utils.file_conversion import convert_file_to_text
|
|||||||
DIRECTORY = pathlib.Path(__file__).parent.resolve()
|
DIRECTORY = pathlib.Path(__file__).parent.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
|
||||||
def test_convert_file_to_text():
|
def test_convert_file_to_text():
|
||||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
||||||
html_text = convert_file_to_text(filename, source_format="epub", target_format="html")
|
html_text = convert_file_to_text(filename, source_format="epub", target_format="html")
|
||||||
|
@ -266,6 +266,7 @@ def test_auto_partition_pptx_from_filename():
|
|||||||
assert elements[0].metadata.filename == filename
|
assert elements[0].metadata.filename == filename
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
|
||||||
def test_auto_partition_ppt_from_filename():
|
def test_auto_partition_ppt_from_filename():
|
||||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.ppt")
|
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake-power-point.ppt")
|
||||||
elements = partition(filename=filename)
|
elements = partition(filename=filename)
|
||||||
@ -279,6 +280,7 @@ def test_auto_with_page_breaks():
|
|||||||
assert PageBreak() in elements
|
assert PageBreak() in elements
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
|
||||||
def test_auto_partition_epub_from_filename():
|
def test_auto_partition_epub_from_filename():
|
||||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
||||||
elements = partition(filename=filename)
|
elements = partition(filename=filename)
|
||||||
@ -286,6 +288,7 @@ def test_auto_partition_epub_from_filename():
|
|||||||
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
|
||||||
def test_auto_partition_epub_from_file():
|
def test_auto_partition_epub_from_file():
|
||||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
||||||
with open(filename, "rb") as f:
|
with open(filename, "rb") as f:
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from unstructured.partition.epub import partition_epub
|
from unstructured.partition.epub import partition_epub
|
||||||
|
|
||||||
DIRECTORY = pathlib.Path(__file__).parent.resolve()
|
DIRECTORY = pathlib.Path(__file__).parent.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
|
||||||
def test_partition_epub_from_filename():
|
def test_partition_epub_from_filename():
|
||||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
||||||
elements = partition_epub(filename=filename)
|
elements = partition_epub(filename=filename)
|
||||||
@ -13,6 +16,7 @@ def test_partition_epub_from_filename():
|
|||||||
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
assert elements[0].text.startswith("The Project Gutenberg eBook of Winter Sports")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test")
|
||||||
def test_partition_epub_from_file():
|
def test_partition_epub_from_file():
|
||||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "winter-sports.epub")
|
||||||
with open(filename, "rb") as f:
|
with open(filename, "rb") as f:
|
||||||
|
@ -16,7 +16,10 @@ test_files = [
|
|||||||
"fake-html.html",
|
"fake-html.html",
|
||||||
"fake.doc",
|
"fake.doc",
|
||||||
"fake-email.eml",
|
"fake-email.eml",
|
||||||
"fake-power-point.ppt",
|
pytest.param(
|
||||||
|
"fake-power-point.ppt",
|
||||||
|
marks=pytest.mark.xfail(reason="Requirements mismatch, should only fail in docker test"),
|
||||||
|
),
|
||||||
"fake.docx",
|
"fake.docx",
|
||||||
"fake-power-point.pptx",
|
"fake-power-point.pptx",
|
||||||
]
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user