mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
feat: add .pre-commit-config.yaml
to let users enable pre-commit
hooks (#320)
Per the README, provides an optional `pre-commit` configuration file to ensure code matches the formatting and linting standards used in `unstructured`.
This commit is contained in:
parent
f5af87a540
commit
2979e17aa4
37
.pre-commit-config.yaml
Normal file
37
.pre-commit-config.yaml
Normal file
@ -0,0 +1,37 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: "v4.3.0"
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-toml
|
||||
- id: check-yaml
|
||||
- id: check-json
|
||||
- id: check-xml
|
||||
- id: end-of-file-fixer
|
||||
include: \.py$
|
||||
- id: trailing-whitespace
|
||||
- id: mixed-line-ending
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.10.0
|
||||
hooks:
|
||||
- id: black
|
||||
args: ["--line-length=100"]
|
||||
language_version: python3
|
||||
|
||||
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
||||
rev: "v0.0.230"
|
||||
hooks:
|
||||
- id: ruff
|
||||
args:
|
||||
[
|
||||
"--fix",
|
||||
"--select=I,UP015,UP032,UP034,UP018,COM,C4,PT,SIM,PLR0402",
|
||||
"--ignore=PT011,PT012,SIM117",
|
||||
]
|
||||
|
||||
- repo: https://github.com/pycqa/flake8
|
||||
rev: 4.0.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
language_version: python3
|
@ -121,6 +121,15 @@ locally if you are planning to contribute to the project.
|
||||
* For processing image files, `tesseract` is required. See [here](https://tesseract-ocr.github.io/tessdoc/Installation.html) for installation instructions.
|
||||
* For processing PDF files, `tesseract` and `poppler` are required. The [pdf2image docs](https://pdf2image.readthedocs.io/en/latest/installation.html) have instructions on installing `poppler` across various platforms.
|
||||
|
||||
Additionally, if you're planning to contribute to `unstructured`, we provide you an optional `pre-commit` configuration
|
||||
file to ensure your code matches the formatting and linting standards used in `unstructured`.
|
||||
If you'd prefer not having code changes auto-tidied before every commit, you can use `make check` to see
|
||||
whether any linting or formatting changes should be applied, and `make tidy` to apply them.
|
||||
|
||||
If using the optional `pre-commit`, you'll just need to install the hooks with `pre-commit install` since the
|
||||
`pre-commit` package is installed as part of `make install` mentioned above. Finally, if you decided to use `pre-commit`
|
||||
you can also uninstall the hooks with `pre-commit uninstall`.
|
||||
|
||||
## :clap: Quick Tour
|
||||
|
||||
You can run this [Colab notebook](https://colab.research.google.com/drive/1U8VCjY2-x8c6y5TYMbSFtQGlQVFHCVIW) to run the examples below.
|
||||
|
@ -6,7 +6,7 @@
|
||||
#
|
||||
alabaster==0.7.13
|
||||
# via sphinx
|
||||
babel==2.11.0
|
||||
babel==2.12.1
|
||||
# via sphinx
|
||||
beautifulsoup4==4.11.2
|
||||
# via furo
|
||||
|
@ -6,7 +6,7 @@
|
||||
#
|
||||
alabaster==0.7.13
|
||||
# via sphinx
|
||||
babel==2.11.0
|
||||
babel==2.12.1
|
||||
# via sphinx
|
||||
beautifulsoup4==4.11.2
|
||||
# via furo
|
||||
|
@ -1,7 +1,7 @@
|
||||
jupyter
|
||||
ipython
|
||||
pip-tools
|
||||
|
||||
pre-commit
|
||||
# NOTE(robinson) - Required pins for security scans
|
||||
jupyter-core>=4.11.2
|
||||
wheel>=0.38.1
|
||||
|
@ -33,6 +33,8 @@ build==0.10.0
|
||||
# via pip-tools
|
||||
cffi==1.15.1
|
||||
# via argon2-cffi-bindings
|
||||
cfgv==3.3.1
|
||||
# via pre-commit
|
||||
click==8.1.3
|
||||
# via pip-tools
|
||||
comm==0.1.2
|
||||
@ -43,12 +45,18 @@ decorator==5.1.1
|
||||
# via ipython
|
||||
defusedxml==0.7.1
|
||||
# via nbconvert
|
||||
distlib==0.3.6
|
||||
# via virtualenv
|
||||
executing==1.2.0
|
||||
# via stack-data
|
||||
fastjsonschema==2.16.3
|
||||
# via nbformat
|
||||
filelock==3.9.0
|
||||
# via virtualenv
|
||||
fqdn==1.5.1
|
||||
# via jsonschema
|
||||
identify==2.5.18
|
||||
# via pre-commit
|
||||
idna==3.4
|
||||
# via
|
||||
# anyio
|
||||
@ -67,7 +75,7 @@ ipykernel==6.21.2
|
||||
# nbclassic
|
||||
# notebook
|
||||
# qtconsole
|
||||
ipython==8.10.0
|
||||
ipython==8.11.0
|
||||
# via
|
||||
# -r requirements/dev.in
|
||||
# ipykernel
|
||||
@ -166,6 +174,8 @@ nest-asyncio==1.5.6
|
||||
# ipykernel
|
||||
# nbclassic
|
||||
# notebook
|
||||
nodeenv==1.7.0
|
||||
# via pre-commit
|
||||
notebook==6.5.2
|
||||
# via jupyter
|
||||
notebook-shim==0.2.2
|
||||
@ -185,18 +195,22 @@ pexpect==4.8.0
|
||||
# via ipython
|
||||
pickleshare==0.7.5
|
||||
# via ipython
|
||||
pip-tools==6.12.2
|
||||
pip-tools==6.12.3
|
||||
# via -r requirements/dev.in
|
||||
pkgutil-resolve-name==1.3.10
|
||||
# via jsonschema
|
||||
platformdirs==3.0.0
|
||||
# via jupyter-core
|
||||
# via
|
||||
# jupyter-core
|
||||
# virtualenv
|
||||
pre-commit==3.1.1
|
||||
# via -r requirements/dev.in
|
||||
prometheus-client==0.16.0
|
||||
# via
|
||||
# jupyter-server
|
||||
# nbclassic
|
||||
# notebook
|
||||
prompt-toolkit==3.0.37
|
||||
prompt-toolkit==3.0.38
|
||||
# via
|
||||
# ipython
|
||||
# jupyter-console
|
||||
@ -227,7 +241,9 @@ python-dateutil==2.8.2
|
||||
python-json-logger==2.0.7
|
||||
# via jupyter-events
|
||||
pyyaml==6.0
|
||||
# via jupyter-events
|
||||
# via
|
||||
# jupyter-events
|
||||
# pre-commit
|
||||
pyzmq==25.0.0
|
||||
# via
|
||||
# ipykernel
|
||||
@ -306,6 +322,8 @@ traitlets==5.9.0
|
||||
# qtconsole
|
||||
uri-template==1.2.0
|
||||
# via jsonschema
|
||||
virtualenv==20.20.0
|
||||
# via pre-commit
|
||||
wcwidth==0.2.6
|
||||
# via prompt-toolkit
|
||||
webcolors==1.12
|
||||
|
@ -16,9 +16,9 @@ backoff==2.2.1
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# argilla
|
||||
boto3==1.26.80
|
||||
boto3==1.26.82
|
||||
# via unstructured (setup.py)
|
||||
botocore==1.29.80
|
||||
botocore==1.29.82
|
||||
# via
|
||||
# boto3
|
||||
# s3transfer
|
||||
|
@ -8,7 +8,7 @@ anyio==3.6.2
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# httpcore
|
||||
argilla==1.3.0
|
||||
argilla==1.3.1
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# unstructured (setup.py)
|
||||
@ -33,10 +33,6 @@ click==8.1.3
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# nltk
|
||||
colorama==0.4.6
|
||||
# via
|
||||
# click
|
||||
# tqdm
|
||||
deprecated==1.2.13
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
@ -63,6 +59,10 @@ idna==3.4
|
||||
# anyio
|
||||
# requests
|
||||
# rfc3986
|
||||
importlib-metadata==6.0.0
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# markdown
|
||||
joblib==1.2.0
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
@ -73,6 +73,10 @@ lxml==4.9.2
|
||||
# python-docx
|
||||
# python-pptx
|
||||
# unstructured (setup.py)
|
||||
markdown==3.4.1
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# unstructured (setup.py)
|
||||
monotonic==1.6
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
@ -104,7 +108,7 @@ pillow==9.4.0
|
||||
# -r requirements/base.txt
|
||||
# python-pptx
|
||||
# unstructured (setup.py)
|
||||
pydantic==1.10.4
|
||||
pydantic==1.10.5
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# argilla
|
||||
@ -158,7 +162,7 @@ tqdm==4.64.1
|
||||
# -r requirements/base.txt
|
||||
# argilla
|
||||
# nltk
|
||||
typing-extensions==4.4.0
|
||||
typing-extensions==4.5.0
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# pydantic
|
||||
@ -177,3 +181,7 @@ xlsxwriter==3.0.8
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# python-pptx
|
||||
zipp==3.15.0
|
||||
# via
|
||||
# -r requirements/base.txt
|
||||
# importlib-metadata
|
||||
|
Loading…
x
Reference in New Issue
Block a user