Fix json bytes content type detection (#3941)

Fixes order of content type detection strategies for byte-encoded jsons.

Before
```
json_bytes = json.dumps([{"example": "data"}]).encode("utf-8")
file_buffer = io.BytesIO(json_bytes)
detect_filetype(file=file_buffer, metadata_file_path="filename.pdf") 
```

Before
PDF

Now
JSON
This commit is contained in:
Pluto 2025-03-07 11:33:33 +01:00 committed by GitHub
parent 961c8d5b11
commit 74b0647aa2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 317 additions and 260 deletions

View File

@ -1,3 +1,14 @@
## 0.16.25
### Enhancements
### Features
### Fixes
- **Fixes filetype detection for jsons passed as byte streams** - Now it prioritizes magic mimetype prediction over file extension when detecting filetypes
## 0.16.24 ## 0.16.24
### Enhancements ### Enhancements
@ -14,6 +25,7 @@
### Fixes ### Fixes
## 0.16.23 ## 0.16.23
### Enhancements ### Enhancements

View File

@ -2,14 +2,14 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile base.in # pip-compile ./base.in
# #
anyio==4.8.0 anyio==4.8.0
# via httpx # via httpx
backoff==2.2.1 backoff==2.2.1
# via -r base.in # via -r ./base.in
beautifulsoup4==4.13.3 beautifulsoup4==4.13.3
# via -r base.in # via -r ./base.in
certifi==2025.1.31 certifi==2025.1.31
# via # via
# httpcore # httpcore
@ -19,7 +19,7 @@ certifi==2025.1.31
cffi==1.17.1 cffi==1.17.1
# via cryptography # via cryptography
chardet==5.2.0 chardet==5.2.0
# via -r base.in # via -r ./base.in
charset-normalizer==3.4.1 charset-normalizer==3.4.1
# via # via
# requests # requests
@ -28,24 +28,24 @@ click==8.1.8
# via # via
# nltk # nltk
# python-oxmsg # python-oxmsg
cryptography==44.0.1 cryptography==44.0.2
# via unstructured-client # via unstructured-client
dataclasses-json==0.6.7 dataclasses-json==0.6.7
# via # via
# -r base.in # -r ./base.in
# unstructured-client # unstructured-client
deepdiff==8.2.0 deepdiff==8.3.0
# via unstructured-client # via unstructured-client
emoji==2.14.1 emoji==2.14.1
# via -r base.in # via -r ./base.in
exceptiongroup==1.2.2 exceptiongroup==1.2.2
# via anyio # via anyio
filetype==1.2.0 filetype==1.2.0
# via -r base.in # via -r ./base.in
h11==0.14.0 h11==0.14.0
# via httpcore # via httpcore
html5lib==1.1 html5lib==1.1
# via -r base.in # via -r ./base.in
httpcore==1.0.7 httpcore==1.0.7
# via httpx # via httpx
httpx==0.28.1 httpx==0.28.1
@ -61,9 +61,9 @@ joblib==1.4.2
jsonpath-python==1.0.6 jsonpath-python==1.0.6
# via unstructured-client # via unstructured-client
langdetect==1.0.9 langdetect==1.0.9
# via -r base.in # via -r ./base.in
lxml==5.3.1 lxml==5.3.1
# via -r base.in # via -r ./base.in
marshmallow==3.26.1 marshmallow==3.26.1
# via # via
# dataclasses-json # dataclasses-json
@ -75,9 +75,9 @@ mypy-extensions==1.0.0
nest-asyncio==1.6.0 nest-asyncio==1.6.0
# via unstructured-client # via unstructured-client
nltk==3.9.1 nltk==3.9.1
# via -r base.in # via -r ./base.in
numpy==1.26.4 numpy==1.26.4
# via -r base.in # via -r ./base.in
olefile==0.47 olefile==0.47
# via python-oxmsg # via python-oxmsg
orderly-set==5.3.0 orderly-set==5.3.0
@ -87,26 +87,26 @@ packaging==24.2
# marshmallow # marshmallow
# unstructured-client # unstructured-client
psutil==7.0.0 psutil==7.0.0
# via -r base.in # via -r ./base.in
pycparser==2.22 pycparser==2.22
# via cffi # via cffi
pypdf==5.3.0 pypdf==5.3.1
# via unstructured-client # via unstructured-client
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via unstructured-client # via unstructured-client
python-iso639==2025.2.18 python-iso639==2025.2.18
# via -r base.in # via -r ./base.in
python-magic==0.4.27 python-magic==0.4.27
# via -r base.in # via -r ./base.in
python-oxmsg==0.0.2 python-oxmsg==0.0.2
# via -r base.in # via -r ./base.in
rapidfuzz==3.12.1 rapidfuzz==3.12.2
# via -r base.in # via -r ./base.in
regex==2024.11.6 regex==2024.11.6
# via nltk # via nltk
requests==2.32.3 requests==2.32.3
# via # via
# -r base.in # -r ./base.in
# requests-toolbelt # requests-toolbelt
# unstructured-client # unstructured-client
requests-toolbelt==1.0.0 requests-toolbelt==1.0.0
@ -123,11 +123,11 @@ soupsieve==2.6
# via beautifulsoup4 # via beautifulsoup4
tqdm==4.67.1 tqdm==4.67.1
# via # via
# -r base.in # -r ./base.in
# nltk # nltk
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# -r base.in # -r ./base.in
# anyio # anyio
# beautifulsoup4 # beautifulsoup4
# pypdf # pypdf
@ -140,14 +140,14 @@ typing-inspect==0.9.0
# unstructured-client # unstructured-client
unstructured-client==0.25.9 unstructured-client==0.25.9
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# -r base.in # -r ./base.in
urllib3==1.26.20 urllib3==1.26.20
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# requests # requests
# unstructured-client # unstructured-client
webencodings==0.5.1 webencodings==0.5.1
# via html5lib # via html5lib
wrapt==1.17.2 wrapt==1.17.2
# via -r base.in # via -r ./base.in

View File

@ -6,6 +6,8 @@
# we are using v3 client https://weaviate.io/developers/weaviate/client-libraries/python/python_v3 # we are using v3 client https://weaviate.io/developers/weaviate/client-libraries/python/python_v3
weaviate-client>=3.26.7,<4.0.0 weaviate-client>=3.26.7,<4.0.0
# TODO: Constriant due to multiple versions being installed during pip-compile # TODO: Constriant due to multiple versions being installed during pip-compile
protobuf>=6.30.0
# TODO: Constriant due to multiple versions being installed during pip-compile
grpcio>=1.65.5 grpcio>=1.65.5
# TODO: Pinned in transformers package, remove when that gets updated (https://github.com/huggingface/transformers/blob/main/setup.py) # TODO: Pinned in transformers package, remove when that gets updated (https://github.com/huggingface/transformers/blob/main/setup.py)
tokenizers>=0.21,<0.22 tokenizers>=0.21,<0.22

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile dev.in # pip-compile ./dev.in
# #
build==1.2.2.post1 build==1.2.2.post1
# via pip-tools # via pip-tools
@ -10,48 +10,48 @@ cfgv==3.4.0
# via pre-commit # via pre-commit
click==8.1.8 click==8.1.8
# via # via
# -c base.txt # -c ./base.txt
# -c test.txt # -c ./test.txt
# pip-tools # pip-tools
distlib==0.3.9 distlib==0.3.9
# via virtualenv # via virtualenv
filelock==3.17.0 filelock==3.17.0
# via virtualenv # via virtualenv
identify==2.6.7 identify==2.6.8
# via pre-commit # via pre-commit
importlib-metadata==8.6.1 importlib-metadata==8.6.1
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# build # build
nodeenv==1.9.1 nodeenv==1.9.1
# via pre-commit # via pre-commit
packaging==24.2 packaging==24.2
# via # via
# -c base.txt # -c ./base.txt
# -c test.txt # -c ./test.txt
# build # build
pip-tools==7.4.1 pip-tools==7.4.1
# via -r dev.in # via -r ./dev.in
platformdirs==4.3.6 platformdirs==4.3.6
# via # via
# -c test.txt # -c ./test.txt
# virtualenv # virtualenv
pre-commit==4.1.0 pre-commit==4.1.0
# via -r dev.in # via -r ./dev.in
pyproject-hooks==1.2.0 pyproject-hooks==1.2.0
# via # via
# build # build
# pip-tools # pip-tools
pyyaml==6.0.2 pyyaml==6.0.2
# via # via
# -c test.txt # -c ./test.txt
# pre-commit # pre-commit
tomli==2.2.1 tomli==2.2.1
# via # via
# -c test.txt # -c ./test.txt
# build # build
# pip-tools # pip-tools
virtualenv==20.29.2 virtualenv==20.29.3
# via pre-commit # via pre-commit
wheel==0.45.1 wheel==0.45.1
# via pip-tools # via pip-tools

View File

@ -2,23 +2,23 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-csv.in # pip-compile ./extra-csv.in
# #
numpy==1.26.4 numpy==1.26.4
# via # via
# -c base.txt # -c ./base.txt
# pandas # pandas
pandas==2.2.3 pandas==2.2.3
# via -r extra-csv.in # via -r ./extra-csv.in
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# -c base.txt # -c ./base.txt
# pandas # pandas
pytz==2025.1 pytz==2025.1
# via pandas # via pandas
six==1.17.0 six==1.17.0
# via # via
# -c base.txt # -c ./base.txt
# python-dateutil # python-dateutil
tzdata==2025.1 tzdata==2025.1
# via pandas # via pandas

View File

@ -2,15 +2,15 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-docx.in # pip-compile ./extra-docx.in
# #
lxml==5.3.1 lxml==5.3.1
# via # via
# -c base.txt # -c ./base.txt
# python-docx # python-docx
python-docx==1.1.2 python-docx==1.1.2
# via -r extra-docx.in # via -r ./extra-docx.in
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# -c base.txt # -c ./base.txt
# python-docx # python-docx

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-epub.in # pip-compile ./extra-epub.in
# #
pypandoc==1.15 pypandoc==1.15
# via -r extra-epub.in # via -r ./extra-epub.in

View File

@ -2,13 +2,13 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-markdown.in # pip-compile ./extra-markdown.in
# #
importlib-metadata==8.6.1 importlib-metadata==8.6.1
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# markdown # markdown
markdown==3.7 markdown==3.7
# via -r extra-markdown.in # via -r ./extra-markdown.in
zipp==3.21.0 zipp==3.21.0
# via importlib-metadata # via importlib-metadata

View File

@ -2,17 +2,17 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-odt.in # pip-compile ./extra-odt.in
# #
lxml==5.3.1 lxml==5.3.1
# via # via
# -c base.txt # -c ./base.txt
# python-docx # python-docx
pypandoc==1.15 pypandoc==1.15
# via -r extra-odt.in # via -r ./extra-odt.in
python-docx==1.1.2 python-docx==1.1.2
# via -r extra-odt.in # via -r ./extra-odt.in
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# -c base.txt # -c ./base.txt
# python-docx # python-docx

View File

@ -2,23 +2,23 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-paddleocr.in # pip-compile ./extra-paddleocr.in
# #
anyio==4.8.0 anyio==4.8.0
# via # via
# -c base.txt # -c ./base.txt
# httpx # httpx
astor==0.8.1 astor==0.8.1
# via paddlepaddle # via paddlepaddle
certifi==2025.1.31 certifi==2025.1.31
# via # via
# -c base.txt # -c ./base.txt
# httpcore # httpcore
# httpx # httpx
# requests # requests
charset-normalizer==3.4.1 charset-normalizer==3.4.1
# via # via
# -c base.txt # -c ./base.txt
# requests # requests
contourpy==1.3.0 contourpy==1.3.0
# via matplotlib # via matplotlib
@ -26,29 +26,29 @@ cycler==0.12.1
# via matplotlib # via matplotlib
cython==3.0.12 cython==3.0.12
# via unstructured-paddleocr # via unstructured-paddleocr
decorator==5.1.1 decorator==5.2.1
# via paddlepaddle # via paddlepaddle
exceptiongroup==1.2.2 exceptiongroup==1.2.2
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
fonttools==4.56.0 fonttools==4.56.0
# via matplotlib # via matplotlib
h11==0.14.0 h11==0.14.0
# via # via
# -c base.txt # -c ./base.txt
# httpcore # httpcore
httpcore==1.0.7 httpcore==1.0.7
# via # via
# -c base.txt # -c ./base.txt
# httpx # httpx
httpx==0.28.1 httpx==0.28.1
# via # via
# -c base.txt # -c ./base.txt
# paddlepaddle # paddlepaddle
idna==3.10 idna==3.10
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
# httpx # httpx
# requests # requests
@ -72,7 +72,7 @@ networkx==3.2.1
# scikit-image # scikit-image
numpy==1.26.4 numpy==1.26.4
# via # via
# -c base.txt # -c ./base.txt
# contourpy # contourpy
# imageio # imageio
# imgaug # imgaug
@ -96,12 +96,12 @@ opt-einsum==3.3.0
# via paddlepaddle # via paddlepaddle
packaging==24.2 packaging==24.2
# via # via
# -c base.txt # -c ./base.txt
# lazy-loader # lazy-loader
# matplotlib # matplotlib
# scikit-image # scikit-image
paddlepaddle==3.0.0b1 paddlepaddle==3.0.0b1
# via -r extra-paddleocr.in # via -r ./extra-paddleocr.in
pdf2image==1.17.0 pdf2image==1.17.0
# via unstructured-paddleocr # via unstructured-paddleocr
pillow==11.1.0 pillow==11.1.0
@ -113,25 +113,27 @@ pillow==11.1.0
# pdf2image # pdf2image
# scikit-image # scikit-image
# unstructured-paddleocr # unstructured-paddleocr
protobuf==5.29.3 protobuf==6.30.0
# via paddlepaddle # via
# -c ././deps/constraints.txt
# paddlepaddle
pyclipper==1.3.0.post6 pyclipper==1.3.0.post6
# via unstructured-paddleocr # via unstructured-paddleocr
pyparsing==3.2.1 pyparsing==3.2.1
# via matplotlib # via matplotlib
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# -c base.txt # -c ./base.txt
# matplotlib # matplotlib
pyyaml==6.0.2 pyyaml==6.0.2
# via unstructured-paddleocr # via unstructured-paddleocr
rapidfuzz==3.12.1 rapidfuzz==3.12.2
# via # via
# -c base.txt # -c ./base.txt
# unstructured-paddleocr # unstructured-paddleocr
requests==2.32.3 requests==2.32.3
# via # via
# -c base.txt # -c ./base.txt
# unstructured-paddleocr # unstructured-paddleocr
scikit-image==0.24.0 scikit-image==0.24.0
# via # via
@ -147,30 +149,30 @@ shapely==2.0.7
# unstructured-paddleocr # unstructured-paddleocr
six==1.17.0 six==1.17.0
# via # via
# -c base.txt # -c ./base.txt
# imgaug # imgaug
# python-dateutil # python-dateutil
sniffio==1.3.1 sniffio==1.3.1
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
tifffile==2024.8.30 tifffile==2024.8.30
# via scikit-image # via scikit-image
tqdm==4.67.1 tqdm==4.67.1
# via # via
# -c base.txt # -c ./base.txt
# unstructured-paddleocr # unstructured-paddleocr
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
# paddlepaddle # paddlepaddle
unstructured-paddleocr==2.8.1.0 unstructured-paddleocr==2.8.1.0
# via -r extra-paddleocr.in # via -r ./extra-paddleocr.in
urllib3==1.26.20 urllib3==1.26.20
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# -c base.txt # -c ./base.txt
# requests # requests
zipp==3.21.0 zipp==3.21.0
# via importlib-resources # via importlib-resources

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-pandoc.in # pip-compile ./extra-pandoc.in
# #
pypandoc==1.15 pypandoc==1.15
# via -r extra-pandoc.in # via -r ./extra-pandoc.in

View File

@ -2,39 +2,39 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-pdf-image.in # pip-compile ./extra-pdf-image.in
# #
antlr4-python3-runtime==4.9.3 antlr4-python3-runtime==4.9.3
# via omegaconf # via omegaconf
cachetools==5.5.1 cachetools==5.5.2
# via google-auth # via google-auth
certifi==2025.1.31 certifi==2025.1.31
# via # via
# -c base.txt # -c ./base.txt
# requests # requests
cffi==1.17.1 cffi==1.17.1
# via # via
# -c base.txt # -c ./base.txt
# cryptography # cryptography
charset-normalizer==3.4.1 charset-normalizer==3.4.1
# via # via
# -c base.txt # -c ./base.txt
# pdfminer-six # pdfminer-six
# requests # requests
coloredlogs==15.0.1 coloredlogs==15.0.1
# via onnxruntime # via onnxruntime
contourpy==1.3.0 contourpy==1.3.0
# via matplotlib # via matplotlib
cryptography==44.0.1 cryptography==44.0.2
# via # via
# -c base.txt # -c ./base.txt
# pdfminer-six # pdfminer-six
cycler==0.12.1 cycler==0.12.1
# via matplotlib # via matplotlib
deprecated==1.2.18 deprecated==1.2.18
# via pikepdf # via pikepdf
effdet==0.4.1 effdet==0.4.1
# via -r extra-pdf-image.in # via -r ./extra-pdf-image.in
filelock==3.17.0 filelock==3.17.0
# via # via
# huggingface-hub # huggingface-hub
@ -48,26 +48,26 @@ fsspec==2025.2.0
# via # via
# huggingface-hub # huggingface-hub
# torch # torch
google-api-core[grpc]==2.24.1 google-api-core[grpc]==2.8.0
# via google-cloud-vision # via google-cloud-vision
google-auth==2.38.0 google-auth==2.38.0
# via # via
# google-api-core # google-api-core
# google-cloud-vision # google-cloud-vision
google-cloud-vision==3.10.0 google-cloud-vision==2.7.2
# via -r extra-pdf-image.in # via -r ./extra-pdf-image.in
googleapis-common-protos==1.67.0 googleapis-common-protos==1.56.1
# via # via
# google-api-core # google-api-core
# grpcio-status # grpcio-status
grpcio==1.70.0 grpcio==1.70.0
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# google-api-core # google-api-core
# grpcio-status # grpcio-status
grpcio-status==1.70.0 grpcio-status==1.62.3
# via google-api-core # via google-api-core
huggingface-hub==0.28.1 huggingface-hub==0.29.2
# via # via
# timm # timm
# tokenizers # tokenizers
@ -77,17 +77,17 @@ humanfriendly==10.0
# via coloredlogs # via coloredlogs
idna==3.10 idna==3.10
# via # via
# -c base.txt # -c ./base.txt
# requests # requests
importlib-resources==6.5.2 importlib-resources==6.5.2
# via matplotlib # via matplotlib
jinja2==3.1.5 jinja2==3.1.6
# via torch # via torch
kiwisolver==1.4.7 kiwisolver==1.4.7
# via matplotlib # via matplotlib
lxml==5.3.1 lxml==5.3.1
# via # via
# -c base.txt # -c ./base.txt
# pikepdf # pikepdf
markupsafe==3.0.2 markupsafe==3.0.2
# via jinja2 # via jinja2
@ -101,7 +101,7 @@ networkx==3.2.1
# via torch # via torch
numpy==1.26.4 numpy==1.26.4
# via # via
# -c base.txt # -c ./base.txt
# contourpy # contourpy
# matplotlib # matplotlib
# onnx # onnx
@ -117,7 +117,7 @@ omegaconf==2.3.0
# via effdet # via effdet
onnx==1.17.0 onnx==1.17.0
# via # via
# -r extra-pdf-image.in # -r ./extra-pdf-image.in
# unstructured-inference # unstructured-inference
onnxruntime==1.19.2 onnxruntime==1.19.2
# via unstructured-inference # via unstructured-inference
@ -125,7 +125,7 @@ opencv-python==4.11.0.86
# via unstructured-inference # via unstructured-inference
packaging==24.2 packaging==24.2
# via # via
# -c base.txt # -c ./base.txt
# huggingface-hub # huggingface-hub
# matplotlib # matplotlib
# onnxruntime # onnxruntime
@ -135,15 +135,15 @@ packaging==24.2
pandas==2.2.3 pandas==2.2.3
# via unstructured-inference # via unstructured-inference
pdf2image==1.17.0 pdf2image==1.17.0
# via -r extra-pdf-image.in # via -r ./extra-pdf-image.in
pdfminer-six==20240706 pdfminer-six==20240706
# via # via
# -r extra-pdf-image.in # -r ./extra-pdf-image.in
# unstructured-inference # unstructured-inference
pi-heif==0.21.0 pi-heif==0.21.0
# via -r extra-pdf-image.in # via -r ./extra-pdf-image.in
pikepdf==9.5.2 pikepdf==9.5.2
# via -r extra-pdf-image.in # via -r ./extra-pdf-image.in
pillow==11.1.0 pillow==11.1.0
# via # via
# matplotlib # matplotlib
@ -152,14 +152,12 @@ pillow==11.1.0
# pikepdf # pikepdf
# torchvision # torchvision
# unstructured-pytesseract # unstructured-pytesseract
proto-plus==1.26.0 proto-plus==1.20.4
# via google-cloud-vision
protobuf==6.30.0
# via # via
# -c ././deps/constraints.txt
# google-api-core # google-api-core
# google-cloud-vision
protobuf==5.29.3
# via
# google-api-core
# google-cloud-vision
# googleapis-common-protos # googleapis-common-protos
# grpcio-status # grpcio-status
# onnx # onnx
@ -175,19 +173,19 @@ pycocotools==2.0.8
# via effdet # via effdet
pycparser==2.22 pycparser==2.22
# via # via
# -c base.txt # -c ./base.txt
# cffi # cffi
pyparsing==3.2.1 pyparsing==3.2.1
# via matplotlib # via matplotlib
pypdf==5.3.0 pypdf==5.3.1
# via # via
# -c base.txt # -c ./base.txt
# -r extra-pdf-image.in # -r ./extra-pdf-image.in
pypdfium2==4.30.1 pypdfium2==4.30.1
# via unstructured-inference # via unstructured-inference
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# -c base.txt # -c ./base.txt
# matplotlib # matplotlib
# pandas # pandas
python-multipart==0.0.20 python-multipart==0.0.20
@ -200,23 +198,23 @@ pyyaml==6.0.2
# omegaconf # omegaconf
# timm # timm
# transformers # transformers
rapidfuzz==3.12.1 rapidfuzz==3.12.2
# via # via
# -c base.txt # -c ./base.txt
# unstructured-inference # unstructured-inference
regex==2024.11.6 regex==2024.11.6
# via # via
# -c base.txt # -c ./base.txt
# transformers # transformers
requests==2.32.3 requests==2.32.3
# via # via
# -c base.txt # -c ./base.txt
# google-api-core # google-api-core
# huggingface-hub # huggingface-hub
# transformers # transformers
rsa==4.9 rsa==4.9
# via google-auth # via google-auth
safetensors==0.5.2 safetensors==0.5.3
# via # via
# timm # timm
# transformers # transformers
@ -224,19 +222,19 @@ scipy==1.13.1
# via unstructured-inference # via unstructured-inference
six==1.17.0 six==1.17.0
# via # via
# -c base.txt # -c ./base.txt
# python-dateutil # python-dateutil
sympy==1.13.1 sympy==1.13.1
# via # via
# onnxruntime # onnxruntime
# torch # torch
timm==1.0.14 timm==1.0.15
# via # via
# effdet # effdet
# unstructured-inference # unstructured-inference
tokenizers==0.21.0 tokenizers==0.21.0
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# transformers # transformers
torch==2.6.0 torch==2.6.0
# via # via
@ -250,31 +248,31 @@ torchvision==0.21.0
# timm # timm
tqdm==4.67.1 tqdm==4.67.1
# via # via
# -c base.txt # -c ./base.txt
# huggingface-hub # huggingface-hub
# transformers # transformers
transformers==4.49.0 transformers==4.49.0
# via unstructured-inference # via unstructured-inference
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# -c base.txt # -c ./base.txt
# huggingface-hub # huggingface-hub
# pypdf # pypdf
# torch # torch
tzdata==2025.1 tzdata==2025.1
# via pandas # via pandas
unstructured-inference==0.8.7 unstructured-inference==0.8.9
# via -r extra-pdf-image.in # via -r ./extra-pdf-image.in
unstructured-pytesseract==0.3.13 unstructured-pytesseract==0.3.15
# via -r extra-pdf-image.in # via -r ./extra-pdf-image.in
urllib3==1.26.20 urllib3==1.26.20
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# -c base.txt # -c ./base.txt
# requests # requests
wrapt==1.17.2 wrapt==1.17.2
# via # via
# -c base.txt # -c ./base.txt
# deprecated # deprecated
zipp==3.21.0 zipp==3.21.0
# via importlib-resources # via importlib-resources

View File

@ -2,14 +2,14 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-pptx.in # pip-compile ./extra-pptx.in
# #
lxml==5.3.1 lxml==5.3.1
# via python-pptx # via python-pptx
pillow==11.1.0 pillow==11.1.0
# via python-pptx # via python-pptx
python-pptx==1.0.2 python-pptx==1.0.2
# via -r extra-pptx.in # via -r ./extra-pptx.in
typing-extensions==4.12.2 typing-extensions==4.12.2
# via python-pptx # via python-pptx
xlsxwriter==3.2.2 xlsxwriter==3.2.2

View File

@ -2,31 +2,31 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile extra-xlsx.in # pip-compile ./extra-xlsx.in
# #
et-xmlfile==2.0.0 et-xmlfile==2.0.0
# via openpyxl # via openpyxl
networkx==3.2.1 networkx==3.2.1
# via -r extra-xlsx.in # via -r ./extra-xlsx.in
numpy==1.26.4 numpy==1.26.4
# via # via
# -c base.txt # -c ./base.txt
# pandas # pandas
openpyxl==3.1.5 openpyxl==3.1.5
# via -r extra-xlsx.in # via -r ./extra-xlsx.in
pandas==2.2.3 pandas==2.2.3
# via -r extra-xlsx.in # via -r ./extra-xlsx.in
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# -c base.txt # -c ./base.txt
# pandas # pandas
pytz==2025.1 pytz==2025.1
# via pandas # via pandas
six==1.17.0 six==1.17.0
# via # via
# -c base.txt # -c ./base.txt
# python-dateutil # python-dateutil
tzdata==2025.1 tzdata==2025.1
# via pandas # via pandas
xlrd==2.0.1 xlrd==2.0.1
# via -r extra-xlsx.in # via -r ./extra-xlsx.in

View File

@ -2,19 +2,19 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile huggingface.in # pip-compile ./huggingface.in
# #
certifi==2025.1.31 certifi==2025.1.31
# via # via
# -c base.txt # -c ./base.txt
# requests # requests
charset-normalizer==3.4.1 charset-normalizer==3.4.1
# via # via
# -c base.txt # -c ./base.txt
# requests # requests
click==8.1.8 click==8.1.8
# via # via
# -c base.txt # -c ./base.txt
# sacremoses # sacremoses
filelock==3.17.0 filelock==3.17.0
# via # via
@ -25,24 +25,24 @@ fsspec==2025.2.0
# via # via
# huggingface-hub # huggingface-hub
# torch # torch
huggingface-hub==0.28.1 huggingface-hub==0.29.2
# via # via
# tokenizers # tokenizers
# transformers # transformers
idna==3.10 idna==3.10
# via # via
# -c base.txt # -c ./base.txt
# requests # requests
jinja2==3.1.5 jinja2==3.1.6
# via torch # via torch
joblib==1.4.2 joblib==1.4.2
# via # via
# -c base.txt # -c ./base.txt
# sacremoses # sacremoses
langdetect==1.0.9 langdetect==1.0.9
# via # via
# -c base.txt # -c ./base.txt
# -r huggingface.in # -r ./huggingface.in
markupsafe==3.0.2 markupsafe==3.0.2
# via jinja2 # via jinja2
mpmath==1.3.0 mpmath==1.3.0
@ -51,11 +51,11 @@ networkx==3.2.1
# via torch # via torch
numpy==1.26.4 numpy==1.26.4
# via # via
# -c base.txt # -c ./base.txt
# transformers # transformers
packaging==24.2 packaging==24.2
# via # via
# -c base.txt # -c ./base.txt
# huggingface-hub # huggingface-hub
# transformers # transformers
pyyaml==6.0.2 pyyaml==6.0.2
@ -64,47 +64,47 @@ pyyaml==6.0.2
# transformers # transformers
regex==2024.11.6 regex==2024.11.6
# via # via
# -c base.txt # -c ./base.txt
# sacremoses # sacremoses
# transformers # transformers
requests==2.32.3 requests==2.32.3
# via # via
# -c base.txt # -c ./base.txt
# huggingface-hub # huggingface-hub
# transformers # transformers
sacremoses==0.1.1 sacremoses==0.1.1
# via -r huggingface.in # via -r ./huggingface.in
safetensors==0.5.2 safetensors==0.5.3
# via transformers # via transformers
sentencepiece==0.2.0 sentencepiece==0.2.0
# via -r huggingface.in # via -r ./huggingface.in
six==1.17.0 six==1.17.0
# via # via
# -c base.txt # -c ./base.txt
# langdetect # langdetect
sympy==1.13.1 sympy==1.13.1
# via torch # via torch
tokenizers==0.21.0 tokenizers==0.21.0
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# transformers # transformers
torch==2.6.0 torch==2.6.0
# via -r huggingface.in # via -r ./huggingface.in
tqdm==4.67.1 tqdm==4.67.1
# via # via
# -c base.txt # -c ./base.txt
# huggingface-hub # huggingface-hub
# sacremoses # sacremoses
# transformers # transformers
transformers==4.49.0 transformers==4.49.0
# via -r huggingface.in # via -r ./huggingface.in
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# -c base.txt # -c ./base.txt
# huggingface-hub # huggingface-hub
# torch # torch
urllib3==1.26.20 urllib3==1.26.20
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# -c base.txt # -c ./base.txt
# requests # requests

View File

@ -2,46 +2,46 @@
# This file is autogenerated by pip-compile with Python 3.9 # This file is autogenerated by pip-compile with Python 3.9
# by the following command: # by the following command:
# #
# pip-compile test.in # pip-compile ./test.in
# #
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anyio==4.8.0 anyio==4.8.0
# via # via
# -c base.txt # -c ./base.txt
# httpx # httpx
appdirs==1.4.4 appdirs==1.4.4
# via label-studio-sdk # via label-studio-sdk
argcomplete==3.5.3 argcomplete==3.6.0
# via datamodel-code-generator # via datamodel-code-generator
attrs==25.1.0 attrs==25.1.0
# via # via
# jsonschema # jsonschema
# referencing # referencing
autoflake==2.3.1 autoflake==2.3.1
# via -r test.in # via -r ./test.in
black==25.1.0 black==25.1.0
# via # via
# -r test.in # -r ./test.in
# datamodel-code-generator # datamodel-code-generator
certifi==2025.1.31 certifi==2025.1.31
# via # via
# -c base.txt # -c ./base.txt
# httpcore # httpcore
# httpx # httpx
# requests # requests
charset-normalizer==3.4.1 charset-normalizer==3.4.1
# via # via
# -c base.txt # -c ./base.txt
# requests # requests
click==8.1.8 click==8.1.8
# via # via
# -c base.txt # -c ./base.txt
# black # black
# nltk # nltk
coverage[toml]==7.6.12 coverage[toml]==7.6.12
# via # via
# -r test.in # -r ./test.in
# pytest-cov # pytest-cov
datamodel-code-generator==0.26.1 datamodel-code-generator==0.26.1
# via label-studio-sdk # via label-studio-sdk
@ -51,40 +51,40 @@ email-validator==2.2.0
# via pydantic # via pydantic
exceptiongroup==1.2.2 exceptiongroup==1.2.2
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
# pytest # pytest
faker==36.1.1 faker==36.2.2
# via jsf # via jsf
flake8==7.1.2 flake8==7.1.2
# via # via
# -r test.in # -r ./test.in
# flake8-print # flake8-print
flake8-print==5.0.0 flake8-print==5.0.0
# via -r test.in # via -r ./test.in
freezegun==1.5.1 freezegun==1.5.1
# via -r test.in # via -r ./test.in
genson==1.3.0 genson==1.3.0
# via datamodel-code-generator # via datamodel-code-generator
grpcio==1.70.0 grpcio==1.70.0
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# -r test.in # -r ./test.in
h11==0.14.0 h11==0.14.0
# via # via
# -c base.txt # -c ./base.txt
# httpcore # httpcore
httpcore==1.0.7 httpcore==1.0.7
# via # via
# -c base.txt # -c ./base.txt
# httpx # httpx
httpx==0.28.1 httpx==0.28.1
# via # via
# -c base.txt # -c ./base.txt
# label-studio-sdk # label-studio-sdk
idna==3.10 idna==3.10
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
# email-validator # email-validator
# httpx # httpx
@ -98,11 +98,11 @@ iniconfig==2.0.0
# via pytest # via pytest
isort==5.13.2 isort==5.13.2
# via datamodel-code-generator # via datamodel-code-generator
jinja2==3.1.5 jinja2==3.1.6
# via datamodel-code-generator # via datamodel-code-generator
joblib==1.4.2 joblib==1.4.2
# via # via
# -c base.txt # -c ./base.txt
# nltk # nltk
jsf==0.11.2 jsf==0.11.2
# via label-studio-sdk # via label-studio-sdk
@ -113,12 +113,12 @@ jsonschema==4.23.0
jsonschema-specifications==2024.10.1 jsonschema-specifications==2024.10.1
# via jsonschema # via jsonschema
label-studio-sdk==1.0.10 label-studio-sdk==1.0.10
# via -r test.in # via -r ./test.in
liccheck==0.9.2 liccheck==0.9.2
# via -r test.in # via -r ./test.in
lxml==5.3.1 lxml==5.3.1
# via # via
# -c base.txt # -c ./base.txt
# label-studio-sdk # label-studio-sdk
markupsafe==3.0.2 markupsafe==3.0.2
# via jinja2 # via jinja2
@ -127,24 +127,24 @@ mccabe==0.7.0
multidict==6.1.0 multidict==6.1.0
# via yarl # via yarl
mypy==1.15.0 mypy==1.15.0
# via -r test.in # via -r ./test.in
mypy-extensions==1.0.0 mypy-extensions==1.0.0
# via # via
# -c base.txt # -c ./base.txt
# black # black
# mypy # mypy
nltk==3.9.1 nltk==3.9.1
# via # via
# -c base.txt # -c ./base.txt
# label-studio-sdk # label-studio-sdk
numpy==1.26.4 numpy==1.26.4
# via # via
# -c base.txt # -c ./base.txt
# label-studio-sdk # label-studio-sdk
# pandas # pandas
packaging==24.2 packaging==24.2
# via # via
# -c base.txt # -c ./base.txt
# black # black
# datamodel-code-generator # datamodel-code-generator
# pytest # pytest
@ -158,7 +158,7 @@ platformdirs==4.3.6
# via black # via black
pluggy==1.5.0 pluggy==1.5.0
# via pytest # via pytest
propcache==0.2.1 propcache==0.3.0
# via yarl # via yarl
pycodestyle==2.12.1 pycodestyle==2.12.1
# via # via
@ -166,7 +166,7 @@ pycodestyle==2.12.1
# flake8-print # flake8-print
pydantic[email]==2.10.6 pydantic[email]==2.10.6
# via # via
# -r test.in # -r ./test.in
# datamodel-code-generator # datamodel-code-generator
# jsf # jsf
# label-studio-sdk # label-studio-sdk
@ -178,17 +178,17 @@ pyflakes==3.2.0
# via # via
# autoflake # autoflake
# flake8 # flake8
pytest==8.3.4 pytest==8.3.5
# via # via
# pytest-cov # pytest-cov
# pytest-mock # pytest-mock
pytest-cov==6.0.0 pytest-cov==6.0.0
# via -r test.in # via -r ./test.in
pytest-mock==3.14.0 pytest-mock==3.14.0
# via -r test.in # via -r ./test.in
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# -c base.txt # -c ./base.txt
# freezegun # freezegun
# pandas # pandas
pytz==2025.1 pytz==2025.1
@ -203,35 +203,35 @@ referencing==0.36.2
# jsonschema-specifications # jsonschema-specifications
regex==2024.11.6 regex==2024.11.6
# via # via
# -c base.txt # -c ./base.txt
# nltk # nltk
requests==2.32.3 requests==2.32.3
# via # via
# -c base.txt # -c ./base.txt
# label-studio-sdk # label-studio-sdk
# requests-mock # requests-mock
# smart-open # smart-open
requests-mock==1.12.1 requests-mock==1.12.1
# via label-studio-sdk # via label-studio-sdk
rpds-py==0.22.3 rpds-py==0.23.1
# via # via
# jsonschema # jsonschema
# referencing # referencing
rstr==3.2.2 rstr==3.2.2
# via jsf # via jsf
ruff==0.9.6 ruff==0.9.9
# via -r test.in # via -r ./test.in
semantic-version==2.10.0 semantic-version==2.10.0
# via liccheck # via liccheck
six==1.17.0 six==1.17.0
# via # via
# -c base.txt # -c ./base.txt
# python-dateutil # python-dateutil
smart-open[http]==7.1.0 smart-open[http]==7.1.0
# via jsf # via jsf
sniffio==1.3.1 sniffio==1.3.1
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
toml==0.10.2 toml==0.10.2
# via # via
@ -246,21 +246,21 @@ tomli==2.2.1
# pytest # pytest
tqdm==4.67.1 tqdm==4.67.1
# via # via
# -c base.txt # -c ./base.txt
# nltk # nltk
types-click==7.1.8 types-click==7.1.8
# via -r test.in # via -r ./test.in
types-markdown==3.7.0.20241204 types-markdown==3.7.0.20241204
# via -r test.in # via -r ./test.in
types-requests==2.31.0.6 types-requests==2.31.0.6
# via -r test.in # via -r ./test.in
types-tabulate==0.9.0.20241207 types-tabulate==0.9.0.20241207
# via -r test.in # via -r ./test.in
types-urllib3==1.26.25.14 types-urllib3==1.26.25.14
# via types-requests # via types-requests
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# -c base.txt # -c ./base.txt
# anyio # anyio
# black # black
# jsf # jsf
@ -278,15 +278,15 @@ ujson==5.10.0
# via label-studio-sdk # via label-studio-sdk
urllib3==1.26.20 urllib3==1.26.20
# via # via
# -c ./deps/constraints.txt # -c ././deps/constraints.txt
# -c base.txt # -c ./base.txt
# requests # requests
# vcrpy # vcrpy
vcrpy==7.0.0 vcrpy==7.0.0
# via -r test.in # via -r ./test.in
wrapt==1.17.2 wrapt==1.17.2
# via # via
# -c base.txt # -c ./base.txt
# smart-open # smart-open
# vcrpy # vcrpy
xmljson==0.2.1 xmljson==0.2.1

View File

@ -5,6 +5,7 @@
from __future__ import annotations from __future__ import annotations
import io import io
import json
import os import os
import pytest import pytest
@ -944,3 +945,45 @@ class Describe_ZipFileDetector:
): ):
ctx = _FileTypeDetectionContext(example_doc_path(file_name)) ctx = _FileTypeDetectionContext(example_doc_path(file_name))
assert _ZipFileDetector.file_type(ctx) is expected_value assert _ZipFileDetector.file_type(ctx) is expected_value
def test_mimetype_magic_detection_is_used_before_filename_when_filetype_is_detected_for_json():
json_bytes = json.dumps([{"example": "data"}]).encode("utf-8")
file_buffer = io.BytesIO(json_bytes)
predicted_type = detect_filetype(file=file_buffer, metadata_file_path="filename.pdf")
assert predicted_type == FileType.JSON
file_buffer.name = "filename.pdf"
predicted_type = detect_filetype(file=file_buffer)
assert predicted_type == FileType.JSON
def test_mimetype_magic_detection_is_used_before_filename_when_filetype_is_detected_for_ndjson():
data = [{"example": "data1"}, {"example": "data2"}, {"example": "data3"}]
ndjson_string = "\n".join(json.dumps(item) for item in data) + "\n"
ndjson_bytes = ndjson_string.encode("utf-8")
file_buffer = io.BytesIO(ndjson_bytes)
predicted_type = detect_filetype(file=file_buffer, metadata_file_path="filename.pdf")
assert predicted_type == FileType.NDJSON
file_buffer.name = "filename.pdf"
predicted_type = detect_filetype(file=file_buffer)
assert predicted_type == FileType.NDJSON
def test_json_content_type_is_disambiguated_for_ndjson():
data = [{"example": "data1"}, {"example": "data2"}, {"example": "data3"}]
ndjson_string = "\n".join(json.dumps(item) for item in data) + "\n"
ndjson_bytes = ndjson_string.encode("utf-8")
file_buffer = io.BytesIO(ndjson_bytes)
predicted_type = detect_filetype(
file=file_buffer, metadata_file_path="filename.pdf", content_type="application/json"
)
assert predicted_type == FileType.NDJSON
file_buffer.name = "filename.pdf"
predicted_type = detect_filetype(file=file_buffer, content_type="application/json")
assert predicted_type == FileType.NDJSON

View File

@ -409,17 +409,17 @@ def test_auto_partition_json_from_file_preserves_original_elements():
assert elements_to_dicts(partitioned_elements) == elements_to_dicts(original_elements) assert elements_to_dicts(partitioned_elements) == elements_to_dicts(original_elements)
def test_auto_partition_json_raises_with_unprocessable_json(tmp_path: pathlib.Path): def test_auto_partition_processes_simple_ndjson(tmp_path: pathlib.Path):
# NOTE(robinson) - This is unprocessable because it is not a list of dicts, per the text = '{"text": "hello", "type": "NarrativeText"}'
# Unstructured JSON serialization format
text = '{"hi": "there"}'
file_path = str(tmp_path / "unprocessable.json") file_path = str(tmp_path / "unprocessable.json")
with open(file_path, "w") as f: with open(file_path, "w") as f:
f.write(text) f.write(text)
with pytest.raises(ValueError, match="Detected a JSON file that does not conform to the Unst"): result = partition(filename=file_path)
partition(filename=file_path) assert len(result) == 1
assert isinstance(result[0], NarrativeText)
assert "hello" in result[0].text
# ================================================================================================ # ================================================================================================

View File

@ -1 +1 @@
__version__ = "0.16.24" # pragma: no cover __version__ = "0.16.25" # pragma: no cover

View File

@ -169,30 +169,31 @@ class _FileTypeDetector:
# -- accuracy. So start with binary types and only then consider an asserted content-type, # -- accuracy. So start with binary types and only then consider an asserted content-type,
# -- generally as a last resort. # -- generally as a last resort.
# -- strategy 1: most binary types can be detected with 100% accuracy -- if (
if file_type := self._known_binary_file_type: ( # strategy 1: most binary types can be detected with 100% accuracy
return file_type predicted_file_type := self._known_binary_file_type
)
or ( # strategy 2: use content-type asserted by caller
predicted_file_type := self._file_type_from_content_type
)
or ( # strategy 3: guess MIME-type using libmagic and use that
predicted_file_type := self._file_type_from_guessed_mime_type
)
or ( # strategy 4: use filename-extension, like ".docx" -> FileType.DOCX
predicted_file_type := self._file_type_from_file_extension
)
):
result_file_type = predicted_file_type
else:
# give up and report FileType.UNK
result_file_type = FileType.UNK
# -- strategy 2: use content-type asserted by caller -- if result_file_type == FileType.JSON:
if file_type := self._file_type_from_content_type: # edge case where JSON/NDJSON content without file extension
return file_type # (magic lib can't distinguish them)
result_file_type = self._disambiguate_json_file_type
# -- strategy 3: guess MIME-type using libmagic and use that -- return result_file_type
if file_type := self._file_type_from_guessed_mime_type:
return file_type
# -- strategy 4: use filename-extension, like ".docx" -> FileType.DOCX --
if file_type := self._file_type_from_file_extension:
return file_type
# -- strategy 5: edge case where JSON/NDJSON content without file extension --
if file_type := self._disambiguate_json_file_type:
return file_type
# -- strategy 6: give up and report FileType.UNK --
return FileType.UNK
# == STRATEGIES ============================================================
@property @property
def _known_binary_file_type(self) -> FileType | None: def _known_binary_file_type(self) -> FileType | None:

View File

@ -124,8 +124,7 @@ class FileType(enum.Enum):
Returns `None` when `mime_type` is `None` or does not map to the canonical MIME-type of a Returns `None` when `mime_type` is `None` or does not map to the canonical MIME-type of a
`FileType` member or one of its alias MIME-types. `FileType` member or one of its alias MIME-types.
""" """
if mime_type is None or mime_type == "application/json": if mime_type is None:
# application/json is ambiguous as it may point ot JSON and NDJSON file types
return None return None
# -- not super efficient but plenty fast enough for once-or-twice-per-file use and avoids # -- not super efficient but plenty fast enough for once-or-twice-per-file use and avoids
# -- limitations on defining a class variable on an Enum. # -- limitations on defining a class variable on an Enum.