mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-08 01:22:43 +00:00

### Summary Applies tar filters when using Python 3.12 or above. This was added to the [Python `tarfile` library in 3.12](https://docs.python.org/3/library/tarfile.html#extraction-filters) and guards against malicious content being extracted from `.tar.gz` files. ### Testing Added smoke test. If this passes for all Python versions, we're good.
16 lines
448 B
Python
16 lines
448 B
Python
import os
|
|
import tarfile
|
|
|
|
from unstructured.ingest.utils.compression import uncompress_tar_file
|
|
|
|
|
|
def test_uncompress_tar_file(tmpdir):
|
|
tar_filename = os.path.join(tmpdir, "test.tar")
|
|
filename = "example-docs/fake-text.txt"
|
|
|
|
with tarfile.open(tar_filename, "w:gz") as tar:
|
|
tar.add(filename, arcname=os.path.basename(filename))
|
|
|
|
path = uncompress_tar_file(tar_filename, path=tmpdir.dirname)
|
|
assert path == tmpdir.dirname
|