chore: lint for print statements in ingest code (#2215)

### Description
Given the filtering in the ingest logger, anything going to console
should go through that. This adds a linter that only checks for
`print()` statements in the ingest code and ignored it elsewhere for
now.
This commit is contained in:
Roman Isecke 2023-12-05 11:42:23 -05:00 committed by GitHub
parent 8fa5cbf036
commit c5cb216ac8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 26 additions and 12 deletions

View File

@ -1,4 +1,4 @@
## 0.11.4-dev2 ## 0.11.4-dev3
### Enhancements ### Enhancements

View File

@ -339,7 +339,7 @@ test-extra-xlsx:
## check: runs linters (includes tests) ## check: runs linters (includes tests)
.PHONY: check .PHONY: check
check: check-ruff check-black check-flake8 check-version check: check-ruff check-black check-flake8 check-version check-flake8-print
.PHONY: check-black .PHONY: check-black
check-black: check-black:
@ -349,6 +349,12 @@ check-black:
check-flake8: check-flake8:
flake8 . flake8 .
# Check for print statements in ingest since anything going to console should be using the ingest logger
# as it has a built in filter to redact sensitive information
.PHONY: check-flake8-print
check-flake8-print:
flake8 --per-file-ignores "" ./unstructured/ingest
.PHONY: check-ruff .PHONY: check-ruff
check-ruff: check-ruff:
ruff . --select C4,COM,E,F,I,PLR0402,PT,SIM,UP015,UP018,UP032,UP034 --ignore COM812,PT011,PT012,SIM117 ruff . --select C4,COM,E,F,I,PLR0402,PT,SIM,UP015,UP018,UP032,UP034 --ignore COM812,PT011,PT012,SIM117

View File

@ -8,6 +8,7 @@ coverage
click>=8.1 click>=8.1
types-click types-click
flake8 flake8
flake8-print
freezegun freezegun
label_studio_sdk label_studio_sdk
mypy mypy

View File

@ -31,6 +31,10 @@ coverage[toml]==7.3.2
exceptiongroup==1.2.0 exceptiongroup==1.2.0
# via pytest # via pytest
flake8==6.1.0 flake8==6.1.0
# via
# -r test.in
# flake8-print
flake8-print==5.0.0
# via -r test.in # via -r test.in
freezegun==1.2.2 freezegun==1.2.2
# via -r test.in # via -r test.in
@ -77,7 +81,9 @@ platformdirs==3.10.0
pluggy==1.3.0 pluggy==1.3.0
# via pytest # via pytest
pycodestyle==2.11.1 pycodestyle==2.11.1
# via flake8 # via
# flake8
# flake8-print
pydantic==1.10.13 pydantic==1.10.13
# via # via
# -c constraints.in # -c constraints.in

View File

@ -6,6 +6,8 @@ max-line-length = 100
exclude = exclude =
.venv .venv
unstructured-inference unstructured-inference
per-file-ignores =
*: T20
[tool:pytest] [tool:pytest]
filterwarnings = filterwarnings =

View File

@ -18,7 +18,6 @@ fi
CONTAINER=utic-ingest-test-fixtures-output CONTAINER=utic-ingest-test-fixtures-output
DIRECTORY=$(uuidgen) DIRECTORY=$(uuidgen)
DIRECTORY="test"
REMOTE_URL_RAW="$CONTAINER/$DIRECTORY/" REMOTE_URL_RAW="$CONTAINER/$DIRECTORY/"
REMOTE_URL="abfs://$REMOTE_URL_RAW" REMOTE_URL="abfs://$REMOTE_URL_RAW"

View File

@ -36,7 +36,7 @@ def check(connection_string: str, container: str, blob_path: str, expected_files
) )
blob_json_list = [ blob_json_list = [
b.name b.name
for b in list(container_client.list_blobs(name_starts_with="test")) for b in list(container_client.list_blobs(name_starts_with=blob_path))
if b.name.endswith("json") if b.name.endswith("json")
] ]
found = len(blob_json_list) found = len(blob_json_list)

View File

@ -1 +1 @@
__version__ = "0.11.4-dev2" # pragma: no cover __version__ = "0.11.4-dev3" # pragma: no cover

View File

@ -438,8 +438,8 @@ class SharepointPermissionsConnector:
if response.status_code == 200: if response.status_code == 200:
return response.json() return response.json()
else: else:
print(f"Request failed with status code {response.status_code}:") logger.info(f"Request failed with status code {response.status_code}:")
print(response.text) logger.info(response.text)
@requires_dependencies(["requests"], extras="sharepoint") @requires_dependencies(["requests"], extras="sharepoint")
def get_sites(self): def get_sites(self):
@ -529,14 +529,14 @@ class SharepointPermissionsConnector:
sites = [(site["id"], site["webUrl"]) for site in self.get_sites()["value"]] sites = [(site["id"], site["webUrl"]) for site in self.get_sites()["value"]]
drive_ids = [] drive_ids = []
print("Obtaining drive data for sites for permissions (rbac)") logger.info("Obtaining drive data for sites for permissions (rbac)")
for site_id, site_url in sites: for site_id, site_url in sites:
drives = self.get_drives(site_id) drives = self.get_drives(site_id)
if drives: if drives:
drives_for_site = drives["value"] drives_for_site = drives["value"]
drive_ids.extend([(site_id, drive["id"]) for drive in drives_for_site]) drive_ids.extend([(site_id, drive["id"]) for drive in drives_for_site])
print("Obtaining item data from drives for permissions (rbac)") logger.info("Obtaining item data from drives for permissions (rbac)")
item_ids = [] item_ids = []
for site, drive_id in drive_ids: for site, drive_id in drive_ids:
drive_items = self.get_drive_items(site, drive_id) drive_items = self.get_drive_items(site, drive_id)
@ -550,7 +550,7 @@ class SharepointPermissionsConnector:
permissions_dir = Path(output_dir) / "permissions_data" permissions_dir = Path(output_dir) / "permissions_data"
print("Writing permissions data to disk") logger.info("Writing permissions data to disk")
for site, drive_id, item_id, item_name, item_web_url in item_ids: for site, drive_id, item_id, item_name, item_web_url in item_ids:
res = self.get_permissions_for_drive_item(site, drive_id, item_id) res = self.get_permissions_for_drive_item(site, drive_id, item_id)
if res: if res:

View File

@ -230,7 +230,7 @@ class WriteConfig(BaseConfig):
else: else:
self._session_handle = global_write_session_handle self._session_handle = global_write_session_handle
except Exception as e: except Exception as e:
print("Global session handle creation error") logger.info("Global session handle creation error")
raise (e) raise (e)