diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfafcb371..63ed7124a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.5.13-dev4
+## 0.5.13-dev5
 
 ### Enhancements
 
@@ -6,6 +6,8 @@
 
 ### Features
 
+* `bytes_string_to_string` cleaning brick for bytes string output.
+
 ### Fixes
 
 * unstructured-documents encode xml string if document_tree is `None` in `_read_xml`.
diff --git a/docs/source/bricks.rst b/docs/source/bricks.rst
index a0d0a12c2..7b959208e 100644
--- a/docs/source/bricks.rst
+++ b/docs/source/bricks.rst
@@ -801,6 +801,37 @@ Examples:
   # Returns "Look at me, I'm flying!"
   extract_text_after(text, r"SPEAKER \d{1}:")
 
+
+``bytes_string_to_string``
+---------------------------
+
+Converts an output string that looks like a byte string to a string using the specified encoding. This
+happens sometimes in ``partition_html`` when there is a character like an emoji that isn't expected
+by the HTML parser. In that case, the encoded bytes get processed.
+
+Examples:
+
+.. code:: python
+
+  from unstructured.cleaners.core import bytes_string_to_string
+
+  text = "Hello ð\x9f\x98\x80"
+  # The output should be "Hello 😀"
+  bytes_string_to_string(text, encoding="utf-8")
+
+
+.. code:: python
+
+  from unstructured.cleaners.core import bytes_string_to_string
+  from unstructured.partition.html import partition_html
+
+  text = """\n<html charset="utf-8"><p>Hello 😀</p></html>"""
+  elements = partition_html(text=text)
+  elements[0].apply(bytes_string_to_string)
+  # The output should be "Hello 😀"
+  elements[0].text
+
+
 ``extract_email_address``
 --------------------------
 
diff --git a/test_unstructured/cleaners/test_core.py b/test_unstructured/cleaners/test_core.py
index 8c28428b1..196aa19a3 100644
--- a/test_unstructured/cleaners/test_core.py
+++ b/test_unstructured/cleaners/test_core.py
@@ -240,3 +240,8 @@ def test_clean(text, extra_whitespace, dashes, bullets, lowercase, trailing_punc
         )
         == expected
     )
+
+
+def test_bytes_string_to_string():
+    text = "\xe6\xaf\x8f\xe6\x97\xa5\xe6\x96\xb0\xe9\x97\xbb"
+    assert core.bytes_string_to_string(text, "utf-8") == "每日新闻"
diff --git a/test_unstructured/partition/test_html_partition.py b/test_unstructured/partition/test_html_partition.py
index 4fa6e3ace..51fab4ef2 100644
--- a/test_unstructured/partition/test_html_partition.py
+++ b/test_unstructured/partition/test_html_partition.py
@@ -6,7 +6,7 @@ import pytest
 import requests
 from requests.models import Response
 
-from unstructured.documents.elements import PageBreak
+from unstructured.documents.elements import PageBreak, Title
 from unstructured.partition.html import partition_html
 
 DIRECTORY = pathlib.Path(__file__).parent.resolve()
@@ -155,3 +155,9 @@ def test_partition_html_processes_chinese_chracters():
     html_text = "<html><div><p>每日新闻</p></div></html>"
     elements = partition_html(text=html_text)
     assert elements[0].text == "每日新闻"
+
+
+def test_emoji_appears_with_emoji_utf8_code():
+    html_text = """\n<html charset="utf-8"><p>Hello &#128512;</p></html>"""
+    elements = partition_html(text=html_text)
+    assert elements[0] == Title("Hello 😀")
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 20c9e2db5..cf303ac8f 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.5.13-dev4"  # pragma: no cover
+__version__ = "0.5.13-dev5"  # pragma: no cover
diff --git a/unstructured/cleaners/core.py b/unstructured/cleaners/core.py
index e9cb8a1d5..d034df082 100644
--- a/unstructured/cleaners/core.py
+++ b/unstructured/cleaners/core.py
@@ -238,3 +238,10 @@ def clean(
     cleaned_text = clean_extra_whitespace(cleaned_text) if extra_whitespace else cleaned_text
     cleaned_text = clean_bullets(cleaned_text) if bullets else cleaned_text
     return cleaned_text.strip()
+
+
+def bytes_string_to_string(text: str, encoding: str = "utf-8"):
+    """Converts a string representation of a byte string to a regular string using the
+    specified encoding."""
+    text_bytes = bytes([ord(char) for char in text])
+    return text_bytes.decode(encoding)