mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-02 19:13:13 +00:00
fix: check for xml attribute when identifying pagebreaks (#778)
This commit is contained in:
parent
db4c5dfdf7
commit
feaf1cb4df
@ -1,4 +1,4 @@
|
||||
## 0.7.7-dev2
|
||||
## 0.7.7
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -12,6 +12,8 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* Check for the `xml` attribute on `element` before looking for pagebreaks in `partition_docx`.
|
||||
|
||||
## 0.7.6
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.7.7-dev2" # pragma: no cover
|
||||
__version__ = "0.7.7" # pragma: no cover
|
||||
|
||||
@ -220,9 +220,10 @@ def _element_contains_pagebreak(element) -> bool:
|
||||
["w:br", 'type="page"'], # "Hard" page break inserted by user
|
||||
["lastRenderedPageBreak"], # "Soft" page break inserted by renderer
|
||||
]
|
||||
for indicators in page_break_indicators:
|
||||
if all(indicator in element.xml for indicator in indicators):
|
||||
return True
|
||||
if hasattr(element, "xml"):
|
||||
for indicators in page_break_indicators:
|
||||
if all(indicator in element.xml for indicator in indicators):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user