mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
chore: lint for print statements in ingest code (#2215)
### Description Given the filtering in the ingest logger, anything going to console should go through that. This adds a linter that only checks for `print()` statements in the ingest code and ignored it elsewhere for now.
This commit is contained in:
parent
8fa5cbf036
commit
c5cb216ac8
@ -1,4 +1,4 @@
|
|||||||
## 0.11.4-dev2
|
## 0.11.4-dev3
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
|
8
Makefile
8
Makefile
@ -339,7 +339,7 @@ test-extra-xlsx:
|
|||||||
|
|
||||||
## check: runs linters (includes tests)
|
## check: runs linters (includes tests)
|
||||||
.PHONY: check
|
.PHONY: check
|
||||||
check: check-ruff check-black check-flake8 check-version
|
check: check-ruff check-black check-flake8 check-version check-flake8-print
|
||||||
|
|
||||||
.PHONY: check-black
|
.PHONY: check-black
|
||||||
check-black:
|
check-black:
|
||||||
@ -349,6 +349,12 @@ check-black:
|
|||||||
check-flake8:
|
check-flake8:
|
||||||
flake8 .
|
flake8 .
|
||||||
|
|
||||||
|
# Check for print statements in ingest since anything going to console should be using the ingest logger
|
||||||
|
# as it has a built in filter to redact sensitive information
|
||||||
|
.PHONY: check-flake8-print
|
||||||
|
check-flake8-print:
|
||||||
|
flake8 --per-file-ignores "" ./unstructured/ingest
|
||||||
|
|
||||||
.PHONY: check-ruff
|
.PHONY: check-ruff
|
||||||
check-ruff:
|
check-ruff:
|
||||||
ruff . --select C4,COM,E,F,I,PLR0402,PT,SIM,UP015,UP018,UP032,UP034 --ignore COM812,PT011,PT012,SIM117
|
ruff . --select C4,COM,E,F,I,PLR0402,PT,SIM,UP015,UP018,UP032,UP034 --ignore COM812,PT011,PT012,SIM117
|
||||||
|
@ -8,6 +8,7 @@ coverage
|
|||||||
click>=8.1
|
click>=8.1
|
||||||
types-click
|
types-click
|
||||||
flake8
|
flake8
|
||||||
|
flake8-print
|
||||||
freezegun
|
freezegun
|
||||||
label_studio_sdk
|
label_studio_sdk
|
||||||
mypy
|
mypy
|
||||||
|
@ -31,6 +31,10 @@ coverage[toml]==7.3.2
|
|||||||
exceptiongroup==1.2.0
|
exceptiongroup==1.2.0
|
||||||
# via pytest
|
# via pytest
|
||||||
flake8==6.1.0
|
flake8==6.1.0
|
||||||
|
# via
|
||||||
|
# -r test.in
|
||||||
|
# flake8-print
|
||||||
|
flake8-print==5.0.0
|
||||||
# via -r test.in
|
# via -r test.in
|
||||||
freezegun==1.2.2
|
freezegun==1.2.2
|
||||||
# via -r test.in
|
# via -r test.in
|
||||||
@ -77,7 +81,9 @@ platformdirs==3.10.0
|
|||||||
pluggy==1.3.0
|
pluggy==1.3.0
|
||||||
# via pytest
|
# via pytest
|
||||||
pycodestyle==2.11.1
|
pycodestyle==2.11.1
|
||||||
# via flake8
|
# via
|
||||||
|
# flake8
|
||||||
|
# flake8-print
|
||||||
pydantic==1.10.13
|
pydantic==1.10.13
|
||||||
# via
|
# via
|
||||||
# -c constraints.in
|
# -c constraints.in
|
||||||
|
@ -6,6 +6,8 @@ max-line-length = 100
|
|||||||
exclude =
|
exclude =
|
||||||
.venv
|
.venv
|
||||||
unstructured-inference
|
unstructured-inference
|
||||||
|
per-file-ignores =
|
||||||
|
*: T20
|
||||||
|
|
||||||
[tool:pytest]
|
[tool:pytest]
|
||||||
filterwarnings =
|
filterwarnings =
|
||||||
|
@ -18,7 +18,6 @@ fi
|
|||||||
|
|
||||||
CONTAINER=utic-ingest-test-fixtures-output
|
CONTAINER=utic-ingest-test-fixtures-output
|
||||||
DIRECTORY=$(uuidgen)
|
DIRECTORY=$(uuidgen)
|
||||||
DIRECTORY="test"
|
|
||||||
REMOTE_URL_RAW="$CONTAINER/$DIRECTORY/"
|
REMOTE_URL_RAW="$CONTAINER/$DIRECTORY/"
|
||||||
REMOTE_URL="abfs://$REMOTE_URL_RAW"
|
REMOTE_URL="abfs://$REMOTE_URL_RAW"
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ def check(connection_string: str, container: str, blob_path: str, expected_files
|
|||||||
)
|
)
|
||||||
blob_json_list = [
|
blob_json_list = [
|
||||||
b.name
|
b.name
|
||||||
for b in list(container_client.list_blobs(name_starts_with="test"))
|
for b in list(container_client.list_blobs(name_starts_with=blob_path))
|
||||||
if b.name.endswith("json")
|
if b.name.endswith("json")
|
||||||
]
|
]
|
||||||
found = len(blob_json_list)
|
found = len(blob_json_list)
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.11.4-dev2" # pragma: no cover
|
__version__ = "0.11.4-dev3" # pragma: no cover
|
||||||
|
@ -438,8 +438,8 @@ class SharepointPermissionsConnector:
|
|||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
return response.json()
|
||||||
else:
|
else:
|
||||||
print(f"Request failed with status code {response.status_code}:")
|
logger.info(f"Request failed with status code {response.status_code}:")
|
||||||
print(response.text)
|
logger.info(response.text)
|
||||||
|
|
||||||
@requires_dependencies(["requests"], extras="sharepoint")
|
@requires_dependencies(["requests"], extras="sharepoint")
|
||||||
def get_sites(self):
|
def get_sites(self):
|
||||||
@ -529,14 +529,14 @@ class SharepointPermissionsConnector:
|
|||||||
sites = [(site["id"], site["webUrl"]) for site in self.get_sites()["value"]]
|
sites = [(site["id"], site["webUrl"]) for site in self.get_sites()["value"]]
|
||||||
drive_ids = []
|
drive_ids = []
|
||||||
|
|
||||||
print("Obtaining drive data for sites for permissions (rbac)")
|
logger.info("Obtaining drive data for sites for permissions (rbac)")
|
||||||
for site_id, site_url in sites:
|
for site_id, site_url in sites:
|
||||||
drives = self.get_drives(site_id)
|
drives = self.get_drives(site_id)
|
||||||
if drives:
|
if drives:
|
||||||
drives_for_site = drives["value"]
|
drives_for_site = drives["value"]
|
||||||
drive_ids.extend([(site_id, drive["id"]) for drive in drives_for_site])
|
drive_ids.extend([(site_id, drive["id"]) for drive in drives_for_site])
|
||||||
|
|
||||||
print("Obtaining item data from drives for permissions (rbac)")
|
logger.info("Obtaining item data from drives for permissions (rbac)")
|
||||||
item_ids = []
|
item_ids = []
|
||||||
for site, drive_id in drive_ids:
|
for site, drive_id in drive_ids:
|
||||||
drive_items = self.get_drive_items(site, drive_id)
|
drive_items = self.get_drive_items(site, drive_id)
|
||||||
@ -550,7 +550,7 @@ class SharepointPermissionsConnector:
|
|||||||
|
|
||||||
permissions_dir = Path(output_dir) / "permissions_data"
|
permissions_dir = Path(output_dir) / "permissions_data"
|
||||||
|
|
||||||
print("Writing permissions data to disk")
|
logger.info("Writing permissions data to disk")
|
||||||
for site, drive_id, item_id, item_name, item_web_url in item_ids:
|
for site, drive_id, item_id, item_name, item_web_url in item_ids:
|
||||||
res = self.get_permissions_for_drive_item(site, drive_id, item_id)
|
res = self.get_permissions_for_drive_item(site, drive_id, item_id)
|
||||||
if res:
|
if res:
|
||||||
|
@ -230,7 +230,7 @@ class WriteConfig(BaseConfig):
|
|||||||
else:
|
else:
|
||||||
self._session_handle = global_write_session_handle
|
self._session_handle = global_write_session_handle
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Global session handle creation error")
|
logger.info("Global session handle creation error")
|
||||||
raise (e)
|
raise (e)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user