fix: open xml files in read only mode (#362)

This commit is contained in:
ryannikolaidis 2023-03-13 13:06:45 -07:00 committed by GitHub
parent 7b9475ef26
commit a4726cb197
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 23 additions and 3 deletions

View File

@ -1,4 +1,4 @@
## 0.5.4-dev6
## 0.5.4-dev7
### Enhancements
@ -25,6 +25,7 @@ from `FsspecConnector`
### Fixes
* Fixes processing for text files with `message/rfc822` MIME type.
* Open xml files in read-only mode when reading contents to construct an XMLDocument.
## 0.5.3

View File

@ -106,3 +106,22 @@ def test_partition_html_on_ideas_page():
document_text = "\n\n".join([str(el) for el in elements])
assert document_text.startswith("January 2023(Someone fed my essays into GPT")
assert document_text.endswith("whole new fractal buds.")
def test_user_without_file_write_permission_can_partition_html(tmp_path, monkeypatch):
example_filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "example-10k.html")
# create a file with no write permissions
read_only_file = tmp_path / "example-10k-readonly.html"
read_only_file.touch()
# set content of read_only_file to be that of example-10k.html
with open(example_filename) as f:
read_only_file.write_text(f.read())
# set read_only_file to be read only
read_only_file.chmod(0o444)
# partition html should still work
elements = partition_html(filename=read_only_file.resolve())
assert len(elements) > 0

View File

@ -1 +1 @@
__version__ = "0.5.4-dev6" # pragma: no cover
__version__ = "0.5.4-dev7" # pragma: no cover

View File

@ -90,6 +90,6 @@ class XMLDocument(Document):
stylesheet: Optional[str] = None,
encoding: Optional[str] = "utf-8",
):
with open(filename, "r+", encoding=encoding) as f:
with open(filename, encoding=encoding) as f:
content = f.read()
return cls.from_string(content, parser=parser, stylesheet=stylesheet)