diff --git a/CHANGELOG.md b/CHANGELOG.md index df14a50f9..5c08ad94e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ -## 0.9.3-dev3 +## 0.9.3 ### Enhancements +* Pinned dependency cleanup. * Update `partition_csv` to always use `soupparser_fromstring` to parse `html text` * Update `partition_tsv` to always use `soupparser_fromstring` to parse `html text` * Add `metadata.section` to capture epub table of contents data diff --git a/MANIFEST.in b/MANIFEST.in index f29cd9b8a..d25fb2c79 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,17 +1,21 @@ include requirements/base.in include requirements/huggingface.in -include requirements/local-inference.in include requirements/ingest-s3.in -include requirements/ingest-gcs.in -include requirements/ingest-dropbox.in include requirements/ingest-azure.in include requirements/ingest-discord.in include requirements/ingest-github.in include requirements/ingest-gitlab.in include requirements/ingest-reddit.in +include requirements/ingest-notion.in include requirements/ingest-slack.in include requirements/ingest-wikipedia.in include requirements/ingest-google-drive.in -include requirements/ingest-outlook.in -include requirements/ingest-onedrive.in +include requirements/ingest-gcs.in +include requirements/ingest-elasticsearch.in +include requirements/ingest-dropbox.in include requirements/ingest-box.in +include requirements/ingest-onedrive.in +include requirements/ingest-outlook.in +include requirements/ingest-confluence.in +include requirements/ingest-airtable.in +include requirements/ingest-sharepoint.in diff --git a/Makefile b/Makefile index 01b01f5ce..c3f89fd9c 100644 --- a/Makefile +++ b/Makefile @@ -165,6 +165,10 @@ install-ingest-confluence: install-ingest-airtable: python3 -m pip install -r requirements/ingest-airtable.txt +.PHONY: install-ingest-sharepoint +install-ingest-sharepoint: + python3 -m pip install -r requirements/ingest-sharepoint.txt + .PHONY: install-unstructured-inference install-unstructured-inference: python3 -m pip install -r requirements/local-inference.txt @@ -220,6 +224,8 @@ pip-compile: pip-compile --upgrade requirements/ingest-outlook.in pip-compile --upgrade requirements/ingest-confluence.in pip-compile --upgrade requirements/ingest-airtable.in + pip-compile --upgrade requirements/ingest-sharepoint.in + pip-compile --upgrade requirements/ingest-notion.in ## install-project-local: install unstructured into your local python environment .PHONY: install-project-local diff --git a/requirements/constraints.in b/requirements/constraints.in index 98d8bab4a..e35fcbfd5 100644 --- a/requirements/constraints.in +++ b/requirements/constraints.in @@ -22,5 +22,6 @@ IPython<8.13 # NOTE(robinson) - See this issue here # https://github.com/facebookresearch/detectron2/issues/5010 Pillow<10.0.0 - -cryptography==41.0.2 +# NOTE(alan) Pinned to avoid error that occurs with 2.4.3: +# AttributeError: 'ResourcePath' object has no attribute 'collection' +Office365-REST-Python-Client<2.4.3 diff --git a/requirements/dev.txt b/requirements/dev.txt index 3e06aea10..b3a77cf93 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -68,7 +68,7 @@ defusedxml==0.7.1 # via nbconvert distlib==0.3.7 # via virtualenv -exceptiongroup==1.1.2 +exceptiongroup==1.1.3 # via # -c requirements/test.txt # anyio diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in index bece5cda2..c483952e8 100644 --- a/requirements/extra-pdf-image.in +++ b/requirements/extra-pdf-image.in @@ -7,5 +7,3 @@ pdfminer.six # https://github.com/facebookresearch/detectron2/issues/5010 Pillow<10 unstructured-inference==0.5.9 -# consistency with other ingest-*.in -cryptography==41.0.2 \ No newline at end of file diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index a6d861384..923b0b366 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -22,11 +22,8 @@ coloredlogs==15.0.1 # via onnxruntime contourpy==1.1.0 # via matplotlib -cryptography==41.0.2 - # via - # -c requirements/constraints.in - # -r requirements/extra-pdf-image.in - # pdfminer-six +cryptography==41.0.3 + # via pdfminer-six cycler==0.11.0 # via matplotlib effdet==0.4.1 @@ -127,7 +124,7 @@ protobuf==4.23.4 # via # -c requirements/constraints.in # onnxruntime -pycocotools==2.0.6 +pycocotools==2.0.7 # via effdet pycparser==2.21 # via cffi diff --git a/requirements/ingest-azure.in b/requirements/ingest-azure.in index 31bdb4335..d42acf96a 100644 --- a/requirements/ingest-azure.in +++ b/requirements/ingest-azure.in @@ -2,5 +2,3 @@ -c base.txt adlfs fsspec -# consistency with other ingest-*.in -cryptography==41.0.2 \ No newline at end of file diff --git a/requirements/ingest-azure.txt b/requirements/ingest-azure.txt index a9408e4a7..073e485ac 100644 --- a/requirements/ingest-azure.txt +++ b/requirements/ingest-azure.txt @@ -39,10 +39,8 @@ charset-normalizer==3.2.0 # -c requirements/base.txt # aiohttp # requests -cryptography==41.0.2 +cryptography==41.0.3 # via - # -c requirements/constraints.in - # -r requirements/ingest-azure.in # azure-identity # azure-storage-blob # msal diff --git a/requirements/ingest-box.in b/requirements/ingest-box.in index 2072137d7..58bbb4a5c 100644 --- a/requirements/ingest-box.in +++ b/requirements/ingest-box.in @@ -2,5 +2,3 @@ -c base.txt boxfs fsspec -# consistency with other ingest-*.in -cryptography==41.0.2 \ No newline at end of file diff --git a/requirements/ingest-box.txt b/requirements/ingest-box.txt index 9f0e00845..cf91dd314 100644 --- a/requirements/ingest-box.txt +++ b/requirements/ingest-box.txt @@ -21,11 +21,8 @@ charset-normalizer==3.2.0 # via # -c requirements/base.txt # requests -cryptography==41.0.2 - # via - # -c requirements/constraints.in - # -r requirements/ingest-box.in - # boxsdk +cryptography==41.0.3 + # via boxsdk fsspec==2023.6.0 # via # -r requirements/ingest-box.in diff --git a/requirements/ingest-gcs.txt b/requirements/ingest-gcs.txt index 328bd62cf..0ca5bf852 100644 --- a/requirements/ingest-gcs.txt +++ b/requirements/ingest-gcs.txt @@ -74,7 +74,6 @@ protobuf==4.23.4 # via # -c requirements/constraints.in # google-api-core - # googleapis-common-protos pyasn1==0.5.0 # via # pyasn1-modules diff --git a/requirements/ingest-github.in b/requirements/ingest-github.in index 0f835a8b8..fe6f65a92 100644 --- a/requirements/ingest-github.in +++ b/requirements/ingest-github.in @@ -1,7 +1,4 @@ -c constraints.in -c base.txt # NOTE - pygithub==1.58.0 fails due to https://github.com/PyGithub/PyGithub/issues/2436 -# In the future, we can update this to pygithub>1.58.0 -pygithub==1.58.2 -# consistency with other ingest-*.in -cryptography==41.0.2 +pygithub>1.58.0 diff --git a/requirements/ingest-github.txt b/requirements/ingest-github.txt index e381208c5..575360470 100644 --- a/requirements/ingest-github.txt +++ b/requirements/ingest-github.txt @@ -17,11 +17,8 @@ charset-normalizer==3.2.0 # via # -c requirements/base.txt # requests -cryptography==41.0.2 - # via - # -c requirements/constraints.in - # -r requirements/ingest-github.in - # pyjwt +cryptography==41.0.3 + # via pyjwt deprecated==1.2.14 # via pygithub idna==3.4 @@ -30,7 +27,7 @@ idna==3.4 # requests pycparser==2.21 # via cffi -pygithub==1.58.2 +pygithub==1.59.1 # via -r requirements/ingest-github.in pyjwt[crypto]==2.8.0 # via pygithub diff --git a/requirements/ingest-notion.txt b/requirements/ingest-notion.txt index c82af6a7c..ca9e500d4 100644 --- a/requirements/ingest-notion.txt +++ b/requirements/ingest-notion.txt @@ -12,7 +12,7 @@ certifi==2023.7.22 # -c requirements/constraints.in # httpcore # httpx -exceptiongroup==1.1.2 +exceptiongroup==1.1.3 # via anyio h11==0.14.0 # via httpcore diff --git a/requirements/ingest-onedrive.in b/requirements/ingest-onedrive.in index a4564d16e..d5cd7d03c 100644 --- a/requirements/ingest-onedrive.in +++ b/requirements/ingest-onedrive.in @@ -1,5 +1,4 @@ -c constraints.in -c base.txt msal -Office365-REST-Python-Client==2.4.2 -cryptography==41.0.2 +Office365-REST-Python-Client<2.4.3 diff --git a/requirements/ingest-onedrive.txt b/requirements/ingest-onedrive.txt index ec848438c..c9c0c921d 100644 --- a/requirements/ingest-onedrive.txt +++ b/requirements/ingest-onedrive.txt @@ -15,10 +15,8 @@ charset-normalizer==3.2.0 # via # -c requirements/base.txt # requests -cryptography==41.0.2 +cryptography==41.0.3 # via - # -c requirements/constraints.in - # -r requirements/ingest-onedrive.in # msal # pyjwt idna==3.4 @@ -30,7 +28,9 @@ msal==1.23.0 # -r requirements/ingest-onedrive.in # office365-rest-python-client office365-rest-python-client==2.4.2 - # via -r requirements/ingest-onedrive.in + # via + # -c requirements/constraints.in + # -r requirements/ingest-onedrive.in pycparser==2.21 # via cffi pyjwt[crypto]==2.8.0 diff --git a/requirements/ingest-outlook.in b/requirements/ingest-outlook.in index a4564d16e..d5cd7d03c 100644 --- a/requirements/ingest-outlook.in +++ b/requirements/ingest-outlook.in @@ -1,5 +1,4 @@ -c constraints.in -c base.txt msal -Office365-REST-Python-Client==2.4.2 -cryptography==41.0.2 +Office365-REST-Python-Client<2.4.3 diff --git a/requirements/ingest-outlook.txt b/requirements/ingest-outlook.txt index f3e418fcc..869b24d46 100644 --- a/requirements/ingest-outlook.txt +++ b/requirements/ingest-outlook.txt @@ -15,10 +15,8 @@ charset-normalizer==3.2.0 # via # -c requirements/base.txt # requests -cryptography==41.0.2 +cryptography==41.0.3 # via - # -c requirements/constraints.in - # -r requirements/ingest-outlook.in # msal # pyjwt idna==3.4 @@ -30,7 +28,9 @@ msal==1.23.0 # -r requirements/ingest-outlook.in # office365-rest-python-client office365-rest-python-client==2.4.2 - # via -r requirements/ingest-outlook.in + # via + # -c requirements/constraints.in + # -r requirements/ingest-outlook.in pycparser==2.21 # via cffi pyjwt[crypto]==2.8.0 diff --git a/requirements/ingest-sharepoint.in b/requirements/ingest-sharepoint.in index 869e0e91c..d5cd7d03c 100644 --- a/requirements/ingest-sharepoint.in +++ b/requirements/ingest-sharepoint.in @@ -1,6 +1,4 @@ -c constraints.in -c base.txt -msal==1.23.0 -Office365-REST-Python-Client==2.4.2 -pyjwt==2.8.0 -cryptography==41.0.2 \ No newline at end of file +msal +Office365-REST-Python-Client<2.4.3 diff --git a/requirements/ingest-sharepoint.txt b/requirements/ingest-sharepoint.txt index 4674632e6..075349801 100644 --- a/requirements/ingest-sharepoint.txt +++ b/requirements/ingest-sharepoint.txt @@ -15,9 +15,8 @@ charset-normalizer==3.2.0 # via # -c requirements/base.txt # requests -cryptography==41.0.2 +cryptography==41.0.3 # via - # -r requirements/ingest-sharepoint.in # msal # pyjwt idna==3.4 @@ -29,13 +28,13 @@ msal==1.23.0 # -r requirements/ingest-sharepoint.in # office365-rest-python-client office365-rest-python-client==2.4.2 - # via -r requirements/ingest-sharepoint.in + # via + # -c requirements/constraints.in + # -r requirements/ingest-sharepoint.in pycparser==2.21 # via cffi pyjwt[crypto]==2.8.0 - # via - # -r requirements/ingest-sharepoint.in - # msal + # via msal pytz==2023.3 # via office365-rest-python-client requests==2.31.0 diff --git a/requirements/test.txt b/requirements/test.txt index e7b286b87..09a564b21 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -26,7 +26,7 @@ coverage[toml]==7.3.0 # via # -r requirements/test.in # pytest-cov -exceptiongroup==1.1.2 +exceptiongroup==1.1.3 # via pytest flake8==6.1.0 # via -r requirements/test.in diff --git a/setup.py b/setup.py index 75fedc607..d0bbf3a89 100644 --- a/setup.py +++ b/setup.py @@ -141,6 +141,7 @@ setup( "outlook": load_requirements("requirements/ingest-outlook.in"), "confluence": load_requirements("requirements/ingest-confluence.in"), "airtable": load_requirements("requirements/ingest-airtable.in"), + "sharepoint": load_requirements("requirements/ingest-sharepoint.in"), # Legacy extra requirements "huggingface": load_requirements("requirements/huggingface.in"), "local-inference": all_doc_reqs, diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 8b0878a7b..a909ec339 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.9.3-dev3" # pragma: no cover +__version__ = "0.9.3" # pragma: no cover