feat: add .pre-commit-config.yaml to let users enable pre-commit hooks (#320)

Per the README, provides an optional `pre-commit` configuration
file to ensure code matches the formatting and linting standards used in `unstructured`.
This commit is contained in:
Alvaro Bartolome 2023-03-05 21:23:39 +01:00 committed by GitHub
parent f5af87a540
commit 2979e17aa4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 89 additions and 17 deletions

37
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,37 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: "v4.3.0"
hooks:
- id: check-added-large-files
- id: check-toml
- id: check-yaml
- id: check-json
- id: check-xml
- id: end-of-file-fixer
include: \.py$
- id: trailing-whitespace
- id: mixed-line-ending
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
args: ["--line-length=100"]
language_version: python3
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.230"
hooks:
- id: ruff
args:
[
"--fix",
"--select=I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402",
"--ignore=PT011,PT012,SIM117",
]
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
language_version: python3

View File

@ -121,6 +121,15 @@ locally if you are planning to contribute to the project.
* For processing image files, `tesseract` is required. See [here](https://tesseract-ocr.github.io/tessdoc/Installation.html) for installation instructions.
* For processing PDF files, `tesseract` and `poppler` are required. The [pdf2image docs](https://pdf2image.readthedocs.io/en/latest/installation.html) have instructions on installing `poppler` across various platforms.
Additionally, if you're planning to contribute to `unstructured`, we provide you an optional `pre-commit` configuration
file to ensure your code matches the formatting and linting standards used in `unstructured`.
If you'd prefer not having code changes auto-tidied before every commit, you can use `make check` to see
whether any linting or formatting changes should be applied, and `make tidy` to apply them.
If using the optional `pre-commit`, you'll just need to install the hooks with `pre-commit install` since the
`pre-commit` package is installed as part of `make install` mentioned above. Finally, if you decided to use `pre-commit`
you can also uninstall the hooks with `pre-commit uninstall`.
## :clap: Quick Tour
You can run this [Colab notebook](https://colab.research.google.com/drive/1U8VCjY2-x8c6y5TYMbSFtQGlQVFHCVIW) to run the examples below.

View File

@ -6,7 +6,7 @@
#
alabaster==0.7.13
# via sphinx
babel==2.11.0
babel==2.12.1
# via sphinx
beautifulsoup4==4.11.2
# via furo

View File

@ -6,7 +6,7 @@
#
alabaster==0.7.13
# via sphinx
babel==2.11.0
babel==2.12.1
# via sphinx
beautifulsoup4==4.11.2
# via furo

View File

@ -1,7 +1,7 @@
jupyter
ipython
pip-tools
pre-commit
# NOTE(robinson) - Required pins for security scans
jupyter-core>=4.11.2
wheel>=0.38.1

View File

@ -33,6 +33,8 @@ build==0.10.0
# via pip-tools
cffi==1.15.1
# via argon2-cffi-bindings
cfgv==3.3.1
# via pre-commit
click==8.1.3
# via pip-tools
comm==0.1.2
@ -43,12 +45,18 @@ decorator==5.1.1
# via ipython
defusedxml==0.7.1
# via nbconvert
distlib==0.3.6
# via virtualenv
executing==1.2.0
# via stack-data
fastjsonschema==2.16.3
# via nbformat
filelock==3.9.0
# via virtualenv
fqdn==1.5.1
# via jsonschema
identify==2.5.18
# via pre-commit
idna==3.4
# via
# anyio
@ -67,7 +75,7 @@ ipykernel==6.21.2
# nbclassic
# notebook
# qtconsole
ipython==8.10.0
ipython==8.11.0
# via
# -r requirements/dev.in
# ipykernel
@ -166,6 +174,8 @@ nest-asyncio==1.5.6
# ipykernel
# nbclassic
# notebook
nodeenv==1.7.0
# via pre-commit
notebook==6.5.2
# via jupyter
notebook-shim==0.2.2
@ -185,18 +195,22 @@ pexpect==4.8.0
# via ipython
pickleshare==0.7.5
# via ipython
pip-tools==6.12.2
pip-tools==6.12.3
# via -r requirements/dev.in
pkgutil-resolve-name==1.3.10
# via jsonschema
platformdirs==3.0.0
# via jupyter-core
# via
# jupyter-core
# virtualenv
pre-commit==3.1.1
# via -r requirements/dev.in
prometheus-client==0.16.0
# via
# jupyter-server
# nbclassic
# notebook
prompt-toolkit==3.0.37
prompt-toolkit==3.0.38
# via
# ipython
# jupyter-console
@ -227,7 +241,9 @@ python-dateutil==2.8.2
python-json-logger==2.0.7
# via jupyter-events
pyyaml==6.0
# via jupyter-events
# via
# jupyter-events
# pre-commit
pyzmq==25.0.0
# via
# ipykernel
@ -306,6 +322,8 @@ traitlets==5.9.0
# qtconsole
uri-template==1.2.0
# via jsonschema
virtualenv==20.20.0
# via pre-commit
wcwidth==0.2.6
# via prompt-toolkit
webcolors==1.12

View File

@ -16,9 +16,9 @@ backoff==2.2.1
# via
# -r requirements/base.txt
# argilla
boto3==1.26.80
boto3==1.26.82
# via unstructured (setup.py)
botocore==1.29.80
botocore==1.29.82
# via
# boto3
# s3transfer

View File

@ -8,7 +8,7 @@ anyio==3.6.2
# via
# -r requirements/base.txt
# httpcore
argilla==1.3.0
argilla==1.3.1
# via
# -r requirements/base.txt
# unstructured (setup.py)
@ -33,10 +33,6 @@ click==8.1.3
# via
# -r requirements/base.txt
# nltk
colorama==0.4.6
# via
# click
# tqdm
deprecated==1.2.13
# via
# -r requirements/base.txt
@ -63,6 +59,10 @@ idna==3.4
# anyio
# requests
# rfc3986
importlib-metadata==6.0.0
# via
# -r requirements/base.txt
# markdown
joblib==1.2.0
# via
# -r requirements/base.txt
@ -73,6 +73,10 @@ lxml==4.9.2
# python-docx
# python-pptx
# unstructured (setup.py)
markdown==3.4.1
# via
# -r requirements/base.txt
# unstructured (setup.py)
monotonic==1.6
# via
# -r requirements/base.txt
@ -104,7 +108,7 @@ pillow==9.4.0
# -r requirements/base.txt
# python-pptx
# unstructured (setup.py)
pydantic==1.10.4
pydantic==1.10.5
# via
# -r requirements/base.txt
# argilla
@ -158,7 +162,7 @@ tqdm==4.64.1
# -r requirements/base.txt
# argilla
# nltk
typing-extensions==4.4.0
typing-extensions==4.5.0
# via
# -r requirements/base.txt
# pydantic
@ -177,3 +181,7 @@ xlsxwriter==3.0.8
# via
# -r requirements/base.txt
# python-pptx
zipp==3.15.0
# via
# -r requirements/base.txt
# importlib-metadata