diff --git a/test_unstructured/partition/docx/test_docx.py b/test_unstructured/partition/docx/test_docx.py
index 6d6636f23..ca915d500 100644
--- a/test_unstructured/partition/docx/test_docx.py
+++ b/test_unstructured/partition/docx/test_docx.py
@@ -1,6 +1,7 @@
# pyright: reportPrivateUsage=false
import pathlib
+import re
from tempfile import SpooledTemporaryFile
from typing import Dict, List, cast
@@ -63,27 +64,28 @@ class Describe_DocxPartitioner:
"""
table = docx.Document(example_doc_path("docx-tables.docx")).tables[1]
- html = _DocxPartitioner()._convert_table_to_html(table)
+ # -- re.sub() strips out the extra padding inserted by tabulate --
+ html = re.sub(r" +<", "<", _DocxPartitioner()._convert_table_to_html(table))
expected_lines = [
"
",
"",
- f"| a | >b<{' ' * 96} | c |
",
+ "| a | >b< | c |
",
"",
"",
- "| d | ",
+ "| d | ",
"",
- "| e | f | ",
+ "| e | f | ",
"| g&t | h | ",
"",
- " | i | ",
- f"| j | k{' ' * 104} | l | ",
+ " | i |
",
+ "| j | k | l |
",
"",
"
",
]
actual_lines = html.splitlines()
for expected, actual in zip(expected_lines, actual_lines):
- assert actual == expected
+ assert actual == expected, f"\nexpected: {repr(expected)}\nactual: {repr(actual)}"
def it_can_convert_a_table_to_plain_text(self):
table = docx.Document(example_doc_path("docx-tables.docx")).tables[0]