mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-03 07:05:20 +00:00
feat: add .pre-commit-config.yaml
to let users enable pre-commit
hooks (#320)
Per the README, provides an optional `pre-commit` configuration file to ensure code matches the formatting and linting standards used in `unstructured`.
This commit is contained in:
parent
f5af87a540
commit
2979e17aa4
37
.pre-commit-config.yaml
Normal file
37
.pre-commit-config.yaml
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: "v4.3.0"
|
||||||
|
hooks:
|
||||||
|
- id: check-added-large-files
|
||||||
|
- id: check-toml
|
||||||
|
- id: check-yaml
|
||||||
|
- id: check-json
|
||||||
|
- id: check-xml
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
include: \.py$
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: mixed-line-ending
|
||||||
|
|
||||||
|
- repo: https://github.com/psf/black
|
||||||
|
rev: 22.10.0
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
args: ["--line-length=100"]
|
||||||
|
language_version: python3
|
||||||
|
|
||||||
|
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
||||||
|
rev: "v0.0.230"
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
args:
|
||||||
|
[
|
||||||
|
"--fix",
|
||||||
|
"--select=I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402",
|
||||||
|
"--ignore=PT011,PT012,SIM117",
|
||||||
|
]
|
||||||
|
|
||||||
|
- repo: https://github.com/pycqa/flake8
|
||||||
|
rev: 4.0.1
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
language_version: python3
|
@ -121,6 +121,15 @@ locally if you are planning to contribute to the project.
|
|||||||
* For processing image files, `tesseract` is required. See [here](https://tesseract-ocr.github.io/tessdoc/Installation.html) for installation instructions.
|
* For processing image files, `tesseract` is required. See [here](https://tesseract-ocr.github.io/tessdoc/Installation.html) for installation instructions.
|
||||||
* For processing PDF files, `tesseract` and `poppler` are required. The [pdf2image docs](https://pdf2image.readthedocs.io/en/latest/installation.html) have instructions on installing `poppler` across various platforms.
|
* For processing PDF files, `tesseract` and `poppler` are required. The [pdf2image docs](https://pdf2image.readthedocs.io/en/latest/installation.html) have instructions on installing `poppler` across various platforms.
|
||||||
|
|
||||||
|
Additionally, if you're planning to contribute to `unstructured`, we provide you an optional `pre-commit` configuration
|
||||||
|
file to ensure your code matches the formatting and linting standards used in `unstructured`.
|
||||||
|
If you'd prefer not having code changes auto-tidied before every commit, you can use `make check` to see
|
||||||
|
whether any linting or formatting changes should be applied, and `make tidy` to apply them.
|
||||||
|
|
||||||
|
If using the optional `pre-commit`, you'll just need to install the hooks with `pre-commit install` since the
|
||||||
|
`pre-commit` package is installed as part of `make install` mentioned above. Finally, if you decided to use `pre-commit`
|
||||||
|
you can also uninstall the hooks with `pre-commit uninstall`.
|
||||||
|
|
||||||
## :clap: Quick Tour
|
## :clap: Quick Tour
|
||||||
|
|
||||||
You can run this [Colab notebook](https://colab.research.google.com/drive/1U8VCjY2-x8c6y5TYMbSFtQGlQVFHCVIW) to run the examples below.
|
You can run this [Colab notebook](https://colab.research.google.com/drive/1U8VCjY2-x8c6y5TYMbSFtQGlQVFHCVIW) to run the examples below.
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#
|
#
|
||||||
alabaster==0.7.13
|
alabaster==0.7.13
|
||||||
# via sphinx
|
# via sphinx
|
||||||
babel==2.11.0
|
babel==2.12.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.11.2
|
||||||
# via furo
|
# via furo
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#
|
#
|
||||||
alabaster==0.7.13
|
alabaster==0.7.13
|
||||||
# via sphinx
|
# via sphinx
|
||||||
babel==2.11.0
|
babel==2.12.1
|
||||||
# via sphinx
|
# via sphinx
|
||||||
beautifulsoup4==4.11.2
|
beautifulsoup4==4.11.2
|
||||||
# via furo
|
# via furo
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
jupyter
|
jupyter
|
||||||
ipython
|
ipython
|
||||||
pip-tools
|
pip-tools
|
||||||
|
pre-commit
|
||||||
# NOTE(robinson) - Required pins for security scans
|
# NOTE(robinson) - Required pins for security scans
|
||||||
jupyter-core>=4.11.2
|
jupyter-core>=4.11.2
|
||||||
wheel>=0.38.1
|
wheel>=0.38.1
|
||||||
|
@ -33,6 +33,8 @@ build==0.10.0
|
|||||||
# via pip-tools
|
# via pip-tools
|
||||||
cffi==1.15.1
|
cffi==1.15.1
|
||||||
# via argon2-cffi-bindings
|
# via argon2-cffi-bindings
|
||||||
|
cfgv==3.3.1
|
||||||
|
# via pre-commit
|
||||||
click==8.1.3
|
click==8.1.3
|
||||||
# via pip-tools
|
# via pip-tools
|
||||||
comm==0.1.2
|
comm==0.1.2
|
||||||
@ -43,12 +45,18 @@ decorator==5.1.1
|
|||||||
# via ipython
|
# via ipython
|
||||||
defusedxml==0.7.1
|
defusedxml==0.7.1
|
||||||
# via nbconvert
|
# via nbconvert
|
||||||
|
distlib==0.3.6
|
||||||
|
# via virtualenv
|
||||||
executing==1.2.0
|
executing==1.2.0
|
||||||
# via stack-data
|
# via stack-data
|
||||||
fastjsonschema==2.16.3
|
fastjsonschema==2.16.3
|
||||||
# via nbformat
|
# via nbformat
|
||||||
|
filelock==3.9.0
|
||||||
|
# via virtualenv
|
||||||
fqdn==1.5.1
|
fqdn==1.5.1
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
|
identify==2.5.18
|
||||||
|
# via pre-commit
|
||||||
idna==3.4
|
idna==3.4
|
||||||
# via
|
# via
|
||||||
# anyio
|
# anyio
|
||||||
@ -67,7 +75,7 @@ ipykernel==6.21.2
|
|||||||
# nbclassic
|
# nbclassic
|
||||||
# notebook
|
# notebook
|
||||||
# qtconsole
|
# qtconsole
|
||||||
ipython==8.10.0
|
ipython==8.11.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/dev.in
|
# -r requirements/dev.in
|
||||||
# ipykernel
|
# ipykernel
|
||||||
@ -166,6 +174,8 @@ nest-asyncio==1.5.6
|
|||||||
# ipykernel
|
# ipykernel
|
||||||
# nbclassic
|
# nbclassic
|
||||||
# notebook
|
# notebook
|
||||||
|
nodeenv==1.7.0
|
||||||
|
# via pre-commit
|
||||||
notebook==6.5.2
|
notebook==6.5.2
|
||||||
# via jupyter
|
# via jupyter
|
||||||
notebook-shim==0.2.2
|
notebook-shim==0.2.2
|
||||||
@ -185,18 +195,22 @@ pexpect==4.8.0
|
|||||||
# via ipython
|
# via ipython
|
||||||
pickleshare==0.7.5
|
pickleshare==0.7.5
|
||||||
# via ipython
|
# via ipython
|
||||||
pip-tools==6.12.2
|
pip-tools==6.12.3
|
||||||
# via -r requirements/dev.in
|
# via -r requirements/dev.in
|
||||||
pkgutil-resolve-name==1.3.10
|
pkgutil-resolve-name==1.3.10
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
platformdirs==3.0.0
|
platformdirs==3.0.0
|
||||||
# via jupyter-core
|
# via
|
||||||
|
# jupyter-core
|
||||||
|
# virtualenv
|
||||||
|
pre-commit==3.1.1
|
||||||
|
# via -r requirements/dev.in
|
||||||
prometheus-client==0.16.0
|
prometheus-client==0.16.0
|
||||||
# via
|
# via
|
||||||
# jupyter-server
|
# jupyter-server
|
||||||
# nbclassic
|
# nbclassic
|
||||||
# notebook
|
# notebook
|
||||||
prompt-toolkit==3.0.37
|
prompt-toolkit==3.0.38
|
||||||
# via
|
# via
|
||||||
# ipython
|
# ipython
|
||||||
# jupyter-console
|
# jupyter-console
|
||||||
@ -227,7 +241,9 @@ python-dateutil==2.8.2
|
|||||||
python-json-logger==2.0.7
|
python-json-logger==2.0.7
|
||||||
# via jupyter-events
|
# via jupyter-events
|
||||||
pyyaml==6.0
|
pyyaml==6.0
|
||||||
# via jupyter-events
|
# via
|
||||||
|
# jupyter-events
|
||||||
|
# pre-commit
|
||||||
pyzmq==25.0.0
|
pyzmq==25.0.0
|
||||||
# via
|
# via
|
||||||
# ipykernel
|
# ipykernel
|
||||||
@ -306,6 +322,8 @@ traitlets==5.9.0
|
|||||||
# qtconsole
|
# qtconsole
|
||||||
uri-template==1.2.0
|
uri-template==1.2.0
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
|
virtualenv==20.20.0
|
||||||
|
# via pre-commit
|
||||||
wcwidth==0.2.6
|
wcwidth==0.2.6
|
||||||
# via prompt-toolkit
|
# via prompt-toolkit
|
||||||
webcolors==1.12
|
webcolors==1.12
|
||||||
|
@ -16,9 +16,9 @@ backoff==2.2.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
boto3==1.26.80
|
boto3==1.26.82
|
||||||
# via unstructured (setup.py)
|
# via unstructured (setup.py)
|
||||||
botocore==1.29.80
|
botocore==1.29.82
|
||||||
# via
|
# via
|
||||||
# boto3
|
# boto3
|
||||||
# s3transfer
|
# s3transfer
|
||||||
|
@ -8,7 +8,7 @@ anyio==3.6.2
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# httpcore
|
# httpcore
|
||||||
argilla==1.3.0
|
argilla==1.3.1
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
@ -33,10 +33,6 @@ click==8.1.3
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# nltk
|
# nltk
|
||||||
colorama==0.4.6
|
|
||||||
# via
|
|
||||||
# click
|
|
||||||
# tqdm
|
|
||||||
deprecated==1.2.13
|
deprecated==1.2.13
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -63,6 +59,10 @@ idna==3.4
|
|||||||
# anyio
|
# anyio
|
||||||
# requests
|
# requests
|
||||||
# rfc3986
|
# rfc3986
|
||||||
|
importlib-metadata==6.0.0
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# markdown
|
||||||
joblib==1.2.0
|
joblib==1.2.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -73,6 +73,10 @@ lxml==4.9.2
|
|||||||
# python-docx
|
# python-docx
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
|
markdown==3.4.1
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# unstructured (setup.py)
|
||||||
monotonic==1.6
|
monotonic==1.6
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
@ -104,7 +108,7 @@ pillow==9.4.0
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# python-pptx
|
# python-pptx
|
||||||
# unstructured (setup.py)
|
# unstructured (setup.py)
|
||||||
pydantic==1.10.4
|
pydantic==1.10.5
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
@ -158,7 +162,7 @@ tqdm==4.64.1
|
|||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# argilla
|
# argilla
|
||||||
# nltk
|
# nltk
|
||||||
typing-extensions==4.4.0
|
typing-extensions==4.5.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# pydantic
|
# pydantic
|
||||||
@ -177,3 +181,7 @@ xlsxwriter==3.0.8
|
|||||||
# via
|
# via
|
||||||
# -r requirements/base.txt
|
# -r requirements/base.txt
|
||||||
# python-pptx
|
# python-pptx
|
||||||
|
zipp==3.15.0
|
||||||
|
# via
|
||||||
|
# -r requirements/base.txt
|
||||||
|
# importlib-metadata
|
||||||
|
Loading…
x
Reference in New Issue
Block a user