mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-15 20:27:37 +00:00
refactor: remove download packages step (#3225)
This PR aims to remove the download packages step since all of that gets installed in the base images. This PR also updates the base `wolfi` image because the original base image can not be found anymore: https://github.com/Unstructured-IO/unstructured/actions/runs/9555654898/job/26339587945
This commit is contained in:
parent
77a9e1b54d
commit
b47e6e9fdc
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
@ -499,7 +499,6 @@ jobs:
|
||||
- name: Test Dockerfile
|
||||
run: |
|
||||
echo "UNS_API_KEY=${{ secrets.UNS_API_KEY }}" > uns_test_env_file
|
||||
make docker-dl-packages
|
||||
make docker-build
|
||||
make docker-test CI=true UNSTRUCTURED_INCLUDE_DEBUG_METADATA=true
|
||||
- name: Scan image
|
||||
|
1
.github/workflows/docker-publish.yml
vendored
1
.github/workflows/docker-publish.yml
vendored
@ -47,7 +47,6 @@ jobs:
|
||||
password: ${{ secrets.QUAY_IO_ROBOT_TOKEN }}
|
||||
- name: Build images
|
||||
run: |
|
||||
make docker-dl-packages
|
||||
ARCH=$(cut -d "/" -f2 <<< ${{ matrix.docker-platform }})
|
||||
DOCKER_BUILDKIT=1 docker buildx build --platform=$ARCH --load \
|
||||
-f Dockerfile-$ARCH \
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -204,6 +204,3 @@ examples/**/output/
|
||||
|
||||
outputdiff.txt
|
||||
metricsdiff.txt
|
||||
|
||||
# APK packages for the docker build
|
||||
docker-packages/*
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* **Remove deprecated `overwrite_schema` kwarg from Delta Table connector.**. The `overwrite_schema` kwarg is deprecated in `deltalake>=0.18.0`. `schema_mode=` should be used now instead. `schema_mode="overwrite"` is equivalent to `overwrite_schema=True` and `schema_mode="merge"` is equivalent to `overwrite_schema="False"`. `schema_mode` defaults to `None`. You can also now specify `engine`, which defaults to `"pyarrow"`. You need to specify `enginer="rust"` to use `"schema_mode"`.
|
||||
* **Remove deprecated `overwrite_schema` kwarg from Delta Table connector.** The `overwrite_schema` kwarg is deprecated in `deltalake>=0.18.0`. `schema_mode=` should be used now instead. `schema_mode="overwrite"` is equivalent to `overwrite_schema=True` and `schema_mode="merge"` is equivalent to `overwrite_schema="False"`. `schema_mode` defaults to `None`. You can also now specify `engine`, which defaults to `"pyarrow"`. You need to specify `enginer="rust"` to use `"schema_mode"`.
|
||||
* **Fix passing parameters to python-client** - Remove parsing list arguments to strings in passing arguments to python-client in Ingest workflow and `partition_via_api`
|
||||
* **table metric bug fix** get_element_level_alignment()now will find all the matched indices in predicted table data instead of only returning the first match in the case of multiple matches for the same gt string.
|
||||
* **fsspec connector path/permissions bug** V2 fsspec connectors were failing when defined relative filepaths had leading slash. This strips that slash to guarantee the relative path never has it.
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM quay.io/unstructured-io/base-images:wolfi-base@sha256:6c00a236c648ffdaf196ccbc446f5c6cc9eb4e3ab9e437178abcfac710b2b373
|
||||
FROM quay.io/unstructured-io/base-images:wolfi-base@sha256:863fd5b87e780dacec62b97c2db2aeda7f770fcf9b045b29f53ec1ddbe607b4d
|
||||
|
||||
USER root
|
||||
|
||||
|
4
Makefile
4
Makefile
@ -460,10 +460,6 @@ DOCKER_IMAGE ?= unstructured:dev
|
||||
docker-build:
|
||||
PIP_VERSION=${PIP_VERSION} DOCKER_IMAGE_NAME=${DOCKER_IMAGE} ./scripts/docker-build.sh
|
||||
|
||||
.PHONY: docker-dl-packages
|
||||
docker-dl-packages:
|
||||
@scripts/docker-dl-packages.sh
|
||||
|
||||
.PHONY: docker-start-bash
|
||||
docker-start-bash:
|
||||
docker run -ti --rm ${DOCKER_IMAGE}
|
||||
|
@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
files=(
|
||||
"libreoffice-7.6.5-r0.apk"
|
||||
"libreoffice-24-24.2.4.1-r0.67f8e014.apk"
|
||||
"openjpeg-2.5.0-r0.apk"
|
||||
"poppler-23.09.0-r0.apk"
|
||||
"leptonica-1.83.0-r0.apk"
|
||||
"pandoc-3.1.8-r0.apk"
|
||||
"tesseract-5.3.2-r0.apk"
|
||||
"nltk_data.tgz"
|
||||
|
||||
)
|
||||
|
||||
directory="docker-packages"
|
||||
mkdir -p "${directory}"
|
||||
|
||||
for file in "${files[@]}"; do
|
||||
echo "Downloading ${file}"
|
||||
wget "https://utic-public-cf.s3.amazonaws.com/$file" -P "$directory"
|
||||
done
|
||||
|
||||
echo "Downloads complete."
|
Loading…
x
Reference in New Issue
Block a user