mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-30 17:38:13 +00:00 
			
		
		
		
	 0e16bf4bf0
			
		
	
	
		0e16bf4bf0
		
			
		
	
	
	
	
		
			
			### Summary Applies tar filters when using Python 3.12 or above. This was added to the [Python `tarfile` library in 3.12](https://docs.python.org/3/library/tarfile.html#extraction-filters) and guards against malicious content being extracted from `.tar.gz` files. ### Testing Added smoke test. If this passes for all Python versions, we're good.
		
			
				
	
	
		
			16 lines
		
	
	
		
			448 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			16 lines
		
	
	
		
			448 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os
 | |
| import tarfile
 | |
| 
 | |
| from unstructured.ingest.utils.compression import uncompress_tar_file
 | |
| 
 | |
| 
 | |
| def test_uncompress_tar_file(tmpdir):
 | |
|     tar_filename = os.path.join(tmpdir, "test.tar")
 | |
|     filename = "example-docs/fake-text.txt"
 | |
| 
 | |
|     with tarfile.open(tar_filename, "w:gz") as tar:
 | |
|         tar.add(filename, arcname=os.path.basename(filename))
 | |
| 
 | |
|     path = uncompress_tar_file(tar_filename, path=tmpdir.dirname)
 | |
|     assert path == tmpdir.dirname
 |