mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-25 14:14:30 +00:00
fix: open xml files in read only mode (#362)
This commit is contained in:
parent
7b9475ef26
commit
a4726cb197
@ -1,4 +1,4 @@
|
||||
## 0.5.4-dev6
|
||||
## 0.5.4-dev7
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -25,6 +25,7 @@ from `FsspecConnector`
|
||||
### Fixes
|
||||
|
||||
* Fixes processing for text files with `message/rfc822` MIME type.
|
||||
* Open xml files in read-only mode when reading contents to construct an XMLDocument.
|
||||
|
||||
## 0.5.3
|
||||
|
||||
|
||||
@ -106,3 +106,22 @@ def test_partition_html_on_ideas_page():
|
||||
document_text = "\n\n".join([str(el) for el in elements])
|
||||
assert document_text.startswith("January 2023(Someone fed my essays into GPT")
|
||||
assert document_text.endswith("whole new fractal buds.")
|
||||
|
||||
|
||||
def test_user_without_file_write_permission_can_partition_html(tmp_path, monkeypatch):
|
||||
example_filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "example-10k.html")
|
||||
|
||||
# create a file with no write permissions
|
||||
read_only_file = tmp_path / "example-10k-readonly.html"
|
||||
read_only_file.touch()
|
||||
|
||||
# set content of read_only_file to be that of example-10k.html
|
||||
with open(example_filename) as f:
|
||||
read_only_file.write_text(f.read())
|
||||
|
||||
# set read_only_file to be read only
|
||||
read_only_file.chmod(0o444)
|
||||
|
||||
# partition html should still work
|
||||
elements = partition_html(filename=read_only_file.resolve())
|
||||
assert len(elements) > 0
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.5.4-dev6" # pragma: no cover
|
||||
__version__ = "0.5.4-dev7" # pragma: no cover
|
||||
|
||||
@ -90,6 +90,6 @@ class XMLDocument(Document):
|
||||
stylesheet: Optional[str] = None,
|
||||
encoding: Optional[str] = "utf-8",
|
||||
):
|
||||
with open(filename, "r+", encoding=encoding) as f:
|
||||
with open(filename, encoding=encoding) as f:
|
||||
content = f.read()
|
||||
return cls.from_string(content, parser=parser, stylesheet=stylesheet)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user