fix: handling for empty tables in word docs and powerpoints (#982)

* fix table index error

* changelog and version
This commit is contained in:
Matt Robinson 2023-07-27 11:07:27 -04:00 committed by GitHub
parent df1ba39905
commit 15618e8346
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 3 deletions

View File

@ -9,6 +9,8 @@
### Fixes
* Handling for empty tables in Word Documents and PowerPoints.
## 0.8.4
### Enhancements

View File

@ -219,3 +219,14 @@ def test_convert_office_doc_captures_errors(monkeypatch, caplog):
monkeypatch.setattr(subprocess, "Popen", MockPopenWithError)
common.convert_office_doc("no-real.docx", "fake-directory", target_format="docx")
assert "an error occurred" in caplog.text
class MockDocxEmptyTable:
def __init__(self):
self.rows = []
def test_convert_ms_office_table_to_text_works_with_empty_tables():
table = MockDocxEmptyTable()
assert common.convert_ms_office_table_to_text(table, as_html=True) == ""
assert common.convert_ms_office_table_to_text(table, as_html=False) == ""

View File

@ -318,6 +318,10 @@ def convert_ms_office_table_to_text(table: docxtable.Table, as_html: bool = True
"""
fmt = "html" if as_html else "plain"
rows = list(table.rows)
headers = [cell.text for cell in rows[0].cells]
data = [[cell.text for cell in row.cells] for row in rows[1:]]
return tabulate(data, headers=headers, tablefmt=fmt)
if len(rows) > 0:
headers = [cell.text for cell in rows[0].cells]
data = [[cell.text for cell in row.cells] for row in rows[1:]]
table_text = tabulate(data, headers=headers, tablefmt=fmt)
else:
table_text = ""
return table_text