mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-24 21:55:33 +00:00
fix: link_texts was breaking postgres destination connector (#2460)
Formatting of link_texts was breaking metadata storage. Turns out it didn't need any conforming and came in correctly from json. --------- Co-authored-by: potter-potter <david.potter@gmail.com>
This commit is contained in:
parent
d5a6f4b82c
commit
74dcca44ca
@ -1,4 +1,4 @@
|
||||
## 0.12.3-dev6
|
||||
## 0.12.3-dev7
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
* **Fix databricks-volumes extra location.** `setup.py` is currently pointing to the wrong location for the databricks-volumes extra requirements. This results in errors when trying to build the wheel for unstructured. This change updates to point to the correct path.
|
||||
* **Fix uploading None values to Chroma and Pinecone.** Removes keys with None values with Pinecone and Chroma destinations. Pins Pinecone dependency
|
||||
* **Update documentation.** (i) best practice for table extration by using 'skip_infer_table_types' param, instead of 'pdf_infer_table_structure', and (ii) fixed CSS, RST issues and typo in the documentation.
|
||||
* **Fix postgres storage of link_texts.** Formatting of link_texts was breaking metadata storage.
|
||||
|
||||
|
||||
## 0.12.2
|
||||
|
||||
@ -60,6 +60,23 @@ TEST_DATA_2 = {
|
||||
"embeddings": [0.1, 0.2, 0.3],
|
||||
}
|
||||
|
||||
TEST_DATA_3 = {
|
||||
"metadata": {
|
||||
"coordinates": {"points": [1, 2, 3]},
|
||||
"data_source": {
|
||||
"date_created": "2021-01-01T00:00:00",
|
||||
"date_modified": "2021-01-02T00:00:00",
|
||||
"date_processed": "2022-12-13T15:44:08",
|
||||
"version": 1.1,
|
||||
},
|
||||
"last_modified": "2021-01-03T00:00:00",
|
||||
"page_number": 10,
|
||||
"link_texts": ["Skip to main content"],
|
||||
"link_urls": ["#main-content"],
|
||||
},
|
||||
"embeddings": [0.1, 0.2, 0.3],
|
||||
}
|
||||
|
||||
|
||||
def test_conform_dict_1():
|
||||
"""Validate that the conform_dict method returns the expected output for a real example"""
|
||||
@ -125,3 +142,30 @@ def test_conform_dict_2():
|
||||
"version": "1.1",
|
||||
"points": "[1, 2, 3]",
|
||||
}
|
||||
|
||||
|
||||
def test_conform_dict_link_texts():
|
||||
"""Validate that the conform_dict method returns the expected output link_texts"""
|
||||
# Create a mock instance of the connector class
|
||||
connector = SqlDestinationConnector(write_config=Mock(), connector_config=Mock())
|
||||
|
||||
# Mock the uuid.uuid4 function to return a fixed value
|
||||
with patch("uuid.uuid4", return_value="mocked_uuid"):
|
||||
# Call the conform_dict method
|
||||
data_out = TEST_DATA_3.copy()
|
||||
connector.conform_dict(data_out)
|
||||
|
||||
# Assert that the result matches the expected output
|
||||
assert data_out == {
|
||||
"embeddings": "[0.1, 0.2, 0.3]",
|
||||
"id": "mocked_uuid",
|
||||
"last_modified": datetime.datetime(2021, 1, 3, 0, 0),
|
||||
"link_texts": ["Skip to main content"],
|
||||
"link_urls": ["#main-content"],
|
||||
"page_number": "10",
|
||||
"date_created": datetime.datetime(2021, 1, 1, 0, 0),
|
||||
"date_modified": datetime.datetime(2021, 1, 2, 0, 0),
|
||||
"date_processed": datetime.datetime(2022, 12, 13, 15, 44, 8),
|
||||
"version": "1.1",
|
||||
"points": "[1, 2, 3]",
|
||||
}
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.12.3-dev6" # pragma: no cover
|
||||
__version__ = "0.12.3-dev7" # pragma: no cover
|
||||
|
||||
@ -127,9 +127,6 @@ class SqlDestinationConnector(BaseDestinationConnector):
|
||||
):
|
||||
data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data)
|
||||
|
||||
if link_texts := data.get("metadata", {}).get("link_texts", {}):
|
||||
data["metadata"]["link_texts"] = str(json.dumps(link_texts))
|
||||
|
||||
if sent_from := data.get("metadata", {}).get("sent_from", {}):
|
||||
data["metadata"]["sent_from"] = str(json.dumps(sent_from))
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user