mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-02 19:13:13 +00:00
Fix extracting value from field (#3774)
This commit is contained in:
parent
66d1e5a5cb
commit
c2d17b1ca4
@ -1,4 +1,4 @@
|
||||
## 0.16.5-dev0
|
||||
## 0.16.5-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
|
||||
@ -201,6 +201,10 @@ def test_parsed_ontology_can_be_serialized_from_html(html_file_path, json_file_p
|
||||
|
||||
for i in range(len(expected_json_elements)):
|
||||
assert expected_json_elements[i] == predicted_elements[i]
|
||||
assert (
|
||||
expected_json_elements[i].metadata.text_as_html
|
||||
== predicted_elements[i].metadata.text_as_html
|
||||
)
|
||||
|
||||
|
||||
def test_inline_elements_are_squeezed():
|
||||
|
||||
@ -607,6 +607,21 @@ def test_text_in_form_field_value():
|
||||
assert form_field_value.to_text() == "Random Input Value"
|
||||
|
||||
|
||||
def test_text_in_form_field_value_with_null_value():
|
||||
# language=HTML
|
||||
input_html = """
|
||||
<div class="Page">
|
||||
<input class="FormFieldValue" value=""/>
|
||||
</div>
|
||||
"""
|
||||
page = parse_html_to_ontology(input_html)
|
||||
|
||||
assert len(page.children) == 1
|
||||
form_field_value = page.children[0]
|
||||
assert form_field_value.text == ""
|
||||
assert form_field_value.to_text() == ""
|
||||
|
||||
|
||||
def test_to_text_when_form_field():
|
||||
ontology = Page(
|
||||
children=[
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.16.5-dev0" # pragma: no cover
|
||||
__version__ = "0.16.5-dev1" # pragma: no cover
|
||||
|
||||
@ -93,7 +93,7 @@ class OntologyElement(BaseModel):
|
||||
if self.children and add_children:
|
||||
children_text = " ".join(child.to_text().strip() for child in self.children)
|
||||
return children_text
|
||||
return BeautifulSoup(self.to_html()).get_text().strip()
|
||||
return BeautifulSoup(self.to_html(), "html.parser").get_text().strip()
|
||||
|
||||
def _construct_attribute_string(self, attributes: dict) -> str:
|
||||
return " ".join(
|
||||
@ -466,8 +466,11 @@ class FormFieldValue(OntologyElement):
|
||||
allowed_tags: List[str] = Field(["input"], frozen=True)
|
||||
|
||||
def to_text(self, add_children=True) -> str:
|
||||
text = super().to_text() + self.additional_attributes.get("value", "")
|
||||
return text.strip()
|
||||
text = super().to_text()
|
||||
value = self.additional_attributes.get("value", "")
|
||||
if not value:
|
||||
return text
|
||||
return f"{text} {value}".strip()
|
||||
|
||||
|
||||
class Checkbox(OntologyElement):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user