mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-18 05:37:53 +00:00
feat: Add html escape quotes to cleaning brick (#84)
* feat: Add html escape quotes to cleaning brick * bump changelog
This commit is contained in:
parent
8bb4b02053
commit
c62f18c0d0
@ -1,9 +1,10 @@
|
|||||||
## 0.3.0-dev4
|
## 0.3.0-dev5
|
||||||
|
|
||||||
* Implement staging brick for Argilla.
|
* Implement staging brick for Argilla.
|
||||||
* Removing the local PDF parsing code and any dependencies and tests.
|
* Removing the local PDF parsing code and any dependencies and tests.
|
||||||
* Reorganizes the staging bricks in the unstructured.partition module
|
* Reorganizes the staging bricks in the unstructured.partition module
|
||||||
* Allow entities to be passed into the Datasaur staging brick
|
* Allow entities to be passed into the Datasaur staging brick
|
||||||
|
* Added HTML escapes to the `replace_unicode_quotes` brick
|
||||||
|
|
||||||
## 0.2.6
|
## 0.2.6
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ def test_clean_bullets(text, expected):
|
|||||||
[
|
[
|
||||||
("\x93A lovely quote!\x94", "“A lovely quote!”"),
|
("\x93A lovely quote!\x94", "“A lovely quote!”"),
|
||||||
("\x91A lovely quote!\x92", "‘A lovely quote!’"),
|
("\x91A lovely quote!\x92", "‘A lovely quote!’"),
|
||||||
|
("Our dog's bowl.", "Our dog's bowl."),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_replace_unicode_quotes(text, expected):
|
def test_replace_unicode_quotes(text, expected):
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.3.0-dev4" # pragma: no cover
|
__version__ = "0.3.0-dev5" # pragma: no cover
|
||||||
|
@ -33,6 +33,7 @@ def replace_unicode_quotes(text) -> str:
|
|||||||
text = text.replace("\x92", "’")
|
text = text.replace("\x92", "’")
|
||||||
text = text.replace("\x93", "“")
|
text = text.replace("\x93", "“")
|
||||||
text = text.replace("\x94", "”")
|
text = text.replace("\x94", "”")
|
||||||
|
text = text.replace("'", "'")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user