mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-17 21:29:05 +00:00
feat: Add html escape quotes to cleaning brick (#84)
* feat: Add html escape quotes to cleaning brick * bump changelog
This commit is contained in:
parent
8bb4b02053
commit
c62f18c0d0
@ -1,9 +1,10 @@
|
||||
## 0.3.0-dev4
|
||||
## 0.3.0-dev5
|
||||
|
||||
* Implement staging brick for Argilla.
|
||||
* Removing the local PDF parsing code and any dependencies and tests.
|
||||
* Reorganizes the staging bricks in the unstructured.partition module
|
||||
* Allow entities to be passed into the Datasaur staging brick
|
||||
* Added HTML escapes to the `replace_unicode_quotes` brick
|
||||
|
||||
## 0.2.6
|
||||
|
||||
|
@ -22,6 +22,7 @@ def test_clean_bullets(text, expected):
|
||||
[
|
||||
("\x93A lovely quote!\x94", "“A lovely quote!”"),
|
||||
("\x91A lovely quote!\x92", "‘A lovely quote!’"),
|
||||
("Our dog's bowl.", "Our dog's bowl."),
|
||||
],
|
||||
)
|
||||
def test_replace_unicode_quotes(text, expected):
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.3.0-dev4" # pragma: no cover
|
||||
__version__ = "0.3.0-dev5" # pragma: no cover
|
||||
|
@ -33,6 +33,7 @@ def replace_unicode_quotes(text) -> str:
|
||||
text = text.replace("\x92", "’")
|
||||
text = text.replace("\x93", "“")
|
||||
text = text.replace("\x94", "”")
|
||||
text = text.replace("'", "'")
|
||||
return text
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user