feat: Add html escape quotes to cleaning brick (#84)

* feat: Add html escape quotes to cleaning brick

* bump changelog
This commit is contained in:
Matt Robinson 2022-11-29 10:58:31 -05:00 committed by GitHub
parent 8bb4b02053
commit c62f18c0d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 5 additions and 2 deletions

View File

@ -1,9 +1,10 @@
## 0.3.0-dev4
## 0.3.0-dev5
* Implement staging brick for Argilla.
* Removing the local PDF parsing code and any dependencies and tests.
* Reorganizes the staging bricks in the unstructured.partition module
* Allow entities to be passed into the Datasaur staging brick
* Added HTML escapes to the `replace_unicode_quotes` brick
## 0.2.6

View File

@ -22,6 +22,7 @@ def test_clean_bullets(text, expected):
[
("\x93A lovely quote!\x94", "“A lovely quote!”"),
("\x91A lovely quote!\x92", "A lovely quote!"),
("Our dog's bowl.", "Our dog's bowl."),
],
)
def test_replace_unicode_quotes(text, expected):

View File

@ -1 +1 @@
__version__ = "0.3.0-dev4" # pragma: no cover
__version__ = "0.3.0-dev5" # pragma: no cover

View File

@ -33,6 +33,7 @@ def replace_unicode_quotes(text) -> str:
text = text.replace("\x92", "")
text = text.replace("\x93", "")
text = text.replace("\x94", "")
text = text.replace("'", "'")
return text