mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
chore: lint for print statements in ingest code (#2215)
### Description Given the filtering in the ingest logger, anything going to console should go through that. This adds a linter that only checks for `print()` statements in the ingest code and ignored it elsewhere for now.
This commit is contained in:
parent
8fa5cbf036
commit
c5cb216ac8
@ -1,4 +1,4 @@
|
||||
## 0.11.4-dev2
|
||||
## 0.11.4-dev3
|
||||
|
||||
### Enhancements
|
||||
|
||||
|
8
Makefile
8
Makefile
@ -339,7 +339,7 @@ test-extra-xlsx:
|
||||
|
||||
## check: runs linters (includes tests)
|
||||
.PHONY: check
|
||||
check: check-ruff check-black check-flake8 check-version
|
||||
check: check-ruff check-black check-flake8 check-version check-flake8-print
|
||||
|
||||
.PHONY: check-black
|
||||
check-black:
|
||||
@ -349,6 +349,12 @@ check-black:
|
||||
check-flake8:
|
||||
flake8 .
|
||||
|
||||
# Check for print statements in ingest since anything going to console should be using the ingest logger
|
||||
# as it has a built in filter to redact sensitive information
|
||||
.PHONY: check-flake8-print
|
||||
check-flake8-print:
|
||||
flake8 --per-file-ignores "" ./unstructured/ingest
|
||||
|
||||
.PHONY: check-ruff
|
||||
check-ruff:
|
||||
ruff . --select C4,COM,E,F,I,PLR0402,PT,SIM,UP015,UP018,UP032,UP034 --ignore COM812,PT011,PT012,SIM117
|
||||
|
@ -8,6 +8,7 @@ coverage
|
||||
click>=8.1
|
||||
types-click
|
||||
flake8
|
||||
flake8-print
|
||||
freezegun
|
||||
label_studio_sdk
|
||||
mypy
|
||||
|
@ -31,6 +31,10 @@ coverage[toml]==7.3.2
|
||||
exceptiongroup==1.2.0
|
||||
# via pytest
|
||||
flake8==6.1.0
|
||||
# via
|
||||
# -r test.in
|
||||
# flake8-print
|
||||
flake8-print==5.0.0
|
||||
# via -r test.in
|
||||
freezegun==1.2.2
|
||||
# via -r test.in
|
||||
@ -77,7 +81,9 @@ platformdirs==3.10.0
|
||||
pluggy==1.3.0
|
||||
# via pytest
|
||||
pycodestyle==2.11.1
|
||||
# via flake8
|
||||
# via
|
||||
# flake8
|
||||
# flake8-print
|
||||
pydantic==1.10.13
|
||||
# via
|
||||
# -c constraints.in
|
||||
|
@ -6,6 +6,8 @@ max-line-length = 100
|
||||
exclude =
|
||||
.venv
|
||||
unstructured-inference
|
||||
per-file-ignores =
|
||||
*: T20
|
||||
|
||||
[tool:pytest]
|
||||
filterwarnings =
|
||||
|
@ -18,7 +18,6 @@ fi
|
||||
|
||||
CONTAINER=utic-ingest-test-fixtures-output
|
||||
DIRECTORY=$(uuidgen)
|
||||
DIRECTORY="test"
|
||||
REMOTE_URL_RAW="$CONTAINER/$DIRECTORY/"
|
||||
REMOTE_URL="abfs://$REMOTE_URL_RAW"
|
||||
|
||||
|
@ -36,7 +36,7 @@ def check(connection_string: str, container: str, blob_path: str, expected_files
|
||||
)
|
||||
blob_json_list = [
|
||||
b.name
|
||||
for b in list(container_client.list_blobs(name_starts_with="test"))
|
||||
for b in list(container_client.list_blobs(name_starts_with=blob_path))
|
||||
if b.name.endswith("json")
|
||||
]
|
||||
found = len(blob_json_list)
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.11.4-dev2" # pragma: no cover
|
||||
__version__ = "0.11.4-dev3" # pragma: no cover
|
||||
|
@ -438,8 +438,8 @@ class SharepointPermissionsConnector:
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
print(f"Request failed with status code {response.status_code}:")
|
||||
print(response.text)
|
||||
logger.info(f"Request failed with status code {response.status_code}:")
|
||||
logger.info(response.text)
|
||||
|
||||
@requires_dependencies(["requests"], extras="sharepoint")
|
||||
def get_sites(self):
|
||||
@ -529,14 +529,14 @@ class SharepointPermissionsConnector:
|
||||
sites = [(site["id"], site["webUrl"]) for site in self.get_sites()["value"]]
|
||||
drive_ids = []
|
||||
|
||||
print("Obtaining drive data for sites for permissions (rbac)")
|
||||
logger.info("Obtaining drive data for sites for permissions (rbac)")
|
||||
for site_id, site_url in sites:
|
||||
drives = self.get_drives(site_id)
|
||||
if drives:
|
||||
drives_for_site = drives["value"]
|
||||
drive_ids.extend([(site_id, drive["id"]) for drive in drives_for_site])
|
||||
|
||||
print("Obtaining item data from drives for permissions (rbac)")
|
||||
logger.info("Obtaining item data from drives for permissions (rbac)")
|
||||
item_ids = []
|
||||
for site, drive_id in drive_ids:
|
||||
drive_items = self.get_drive_items(site, drive_id)
|
||||
@ -550,7 +550,7 @@ class SharepointPermissionsConnector:
|
||||
|
||||
permissions_dir = Path(output_dir) / "permissions_data"
|
||||
|
||||
print("Writing permissions data to disk")
|
||||
logger.info("Writing permissions data to disk")
|
||||
for site, drive_id, item_id, item_name, item_web_url in item_ids:
|
||||
res = self.get_permissions_for_drive_item(site, drive_id, item_id)
|
||||
if res:
|
||||
|
@ -230,7 +230,7 @@ class WriteConfig(BaseConfig):
|
||||
else:
|
||||
self._session_handle = global_write_session_handle
|
||||
except Exception as e:
|
||||
print("Global session handle creation error")
|
||||
logger.info("Global session handle creation error")
|
||||
raise (e)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user