mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-10-31 10:03:07 +00:00 
			
		
		
		
	 27fa2a39d8
			
		
	
	
		27fa2a39d8
		
			
		
	
	
	
	
		
			
			Small pr adding tests describing the behavior of `set_element_hierarchy`. No tests were changed, just added.
		
			
				
	
	
		
			486 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			486 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Test-suite for `unstructured.partition.common.metadata` module."""
 | |
| 
 | |
| # pyright: reportPrivateUsage=false
 | |
| 
 | |
| from __future__ import annotations
 | |
| 
 | |
| import copy
 | |
| import datetime as dt
 | |
| import os
 | |
| import pathlib
 | |
| from typing import Any, Callable
 | |
| 
 | |
| import pytest
 | |
| 
 | |
| from unstructured.documents.elements import (
 | |
|     CheckBox,
 | |
|     Element,
 | |
|     ElementMetadata,
 | |
|     FigureCaption,
 | |
|     Header,
 | |
|     ListItem,
 | |
|     NarrativeText,
 | |
|     Text,
 | |
|     Title,
 | |
| )
 | |
| from unstructured.file_utils.model import FileType
 | |
| from unstructured.partition.common.metadata import (
 | |
|     _assign_hash_ids,
 | |
|     apply_metadata,
 | |
|     get_last_modified_date,
 | |
|     set_element_hierarchy,
 | |
| )
 | |
| 
 | |
| # ================================================================================================
 | |
| # LAST-MODIFIED
 | |
| # ================================================================================================
 | |
| 
 | |
| 
 | |
| class Describe_get_last_modified_date:
 | |
|     def it_gets_the_modified_time_of_a_file_identified_by_a_path(self, tmp_path: pathlib.Path):
 | |
|         modified_timestamp = dt.datetime(
 | |
|             year=2024, month=3, day=5, hour=17, minute=43, second=40
 | |
|         ).timestamp()
 | |
|         file_path = tmp_path / "some_file.txt"
 | |
|         file_path.write_text("abcdefg")
 | |
|         os.utime(file_path, (modified_timestamp, modified_timestamp))
 | |
| 
 | |
|         last_modified_date = get_last_modified_date(str(file_path))
 | |
| 
 | |
|         assert last_modified_date == "2024-03-05T17:43:40"
 | |
| 
 | |
|     def but_it_returns_None_when_there_is_no_file_at_that_path(self, tmp_path: pathlib.Path):
 | |
|         file_path = tmp_path / "some_file_that_does_not_exist.txt"
 | |
| 
 | |
|         last_modified_date = get_last_modified_date(str(file_path))
 | |
| 
 | |
|         assert last_modified_date is None
 | |
| 
 | |
| 
 | |
| # ================================================================================================
 | |
| # ELEMENT HIERARCHY
 | |
| # ================================================================================================
 | |
| 
 | |
| 
 | |
| class Describe_set_element_hierarchy:
 | |
| 
 | |
|     def it_applies_default_ruleset(self):
 | |
|         elements = [
 | |
|             Title(element_id="0", text="Title0"),
 | |
|             Text(element_id="1", text="Text0"),
 | |
|             Header(element_id="2", text="Header0"),
 | |
|             Text(element_id="3", text="Text1"),
 | |
|             Title(element_id="4", text="Title1"),
 | |
|             Text(element_id="5", text="Text2"),
 | |
|         ]
 | |
| 
 | |
|         result = set_element_hierarchy(elements)
 | |
| 
 | |
|         assert result[0].metadata.parent_id is None
 | |
|         assert result[1].metadata.parent_id == "0"  # Text0 is under Title0
 | |
|         assert result[2].metadata.parent_id is None  # Header0 is higher than Title0
 | |
|         assert result[3].metadata.parent_id == "2"  # Text1 is under Header0
 | |
|         assert result[4].metadata.parent_id == "2"  # Title1 is under Header0
 | |
|         assert result[5].metadata.parent_id == "4"  # Text2 is under Title1, which is under Header0
 | |
| 
 | |
|     def it_applies_category_depth_when_element_category_is_the_same(self):
 | |
|         elements = [
 | |
|             Title(element_id="0", text="Title0", metadata=ElementMetadata(category_depth=1)),
 | |
|             ListItem(element_id="1", text="ListItem0", metadata=ElementMetadata(category_depth=0)),
 | |
|             ListItem(element_id="2", text="ListItem1", metadata=ElementMetadata(category_depth=1)),
 | |
|             ListItem(element_id="3", text="ListItem2", metadata=ElementMetadata(category_depth=0)),
 | |
|         ]
 | |
| 
 | |
|         result = set_element_hierarchy(elements)
 | |
| 
 | |
|         assert result[0].metadata.parent_id is None
 | |
|         assert result[1].metadata.parent_id == "0"  # category_depth=0
 | |
|         assert result[2].metadata.parent_id == "1"  # category_depth=1, so it is under ListItem0
 | |
|         assert result[3].metadata.parent_id == "0"  # category_depth=0
 | |
| 
 | |
|     def but_it_ignores_category_depth_when_elements_are_of_different_categories(self):
 | |
|         elements = [
 | |
|             Title(element_id="0", text="Title", metadata=ElementMetadata(category_depth=2)),
 | |
|             Text(element_id="1", text="Text", metadata=ElementMetadata(category_depth=0)),
 | |
|             Header(element_id="2", text="Header", metadata=ElementMetadata(category_depth=2)),
 | |
|             Text(element_id="3", text="Text", metadata=ElementMetadata(category_depth=0)),
 | |
|             ListItem(element_id="4", text="ListItem", metadata=ElementMetadata(category_depth=1)),
 | |
|             NarrativeText(element_id="5", text="", metadata=ElementMetadata(category_depth=0)),
 | |
|         ]
 | |
| 
 | |
|         result = set_element_hierarchy(elements)
 | |
| 
 | |
|         assert result[0].metadata.parent_id is None
 | |
|         assert result[1].metadata.parent_id == "0"  # Text is under Title despite category_depth=0
 | |
|         assert result[2].metadata.parent_id is None
 | |
|         assert result[3].metadata.parent_id == "2"  # These are under Header despite category_depth
 | |
|         assert result[4].metadata.parent_id == "2"
 | |
|         assert result[5].metadata.parent_id == "2"
 | |
| 
 | |
|     def it_skips_elements_with_pre_existing_parent_id(self):
 | |
|         elements = [
 | |
|             Title(element_id="0", text="Title", metadata=ElementMetadata(parent_id="10")),
 | |
|             Title(element_id="1", text="Title"),
 | |
|             Text(element_id="2", text="Text"),
 | |
|         ]
 | |
| 
 | |
|         result = set_element_hierarchy(elements)
 | |
| 
 | |
|         # Parent ID should not change and element is skipped in figuring out other elements' parents
 | |
|         assert result[0].metadata.parent_id == "10"
 | |
|         assert result[1].metadata.parent_id is None
 | |
|         assert result[2].metadata.parent_id == "1"
 | |
| 
 | |
|     def it_sets_parent_id_for_each_element_in_elements(self):
 | |
|         elements_to_set = [
 | |
|             Title(text="Title"),  # 0
 | |
|             NarrativeText(text="NarrativeText"),  # 1
 | |
|             FigureCaption(text="FigureCaption"),  # 2
 | |
|             ListItem(text="ListItem"),  # 3
 | |
|             ListItem(text="ListItem", metadata=ElementMetadata(category_depth=1)),  # 4
 | |
|             ListItem(text="ListItem", metadata=ElementMetadata(category_depth=1)),  # 5
 | |
|             ListItem(text="ListItem"),  # 6
 | |
|             CheckBox(element_id="some-id-1", checked=True),  # 7
 | |
|             Title(text="Title 2"),  # 8
 | |
|             ListItem(text="ListItem"),  # 9
 | |
|             ListItem(text="ListItem"),  # 10
 | |
|             Text(text="Text"),  # 11
 | |
|         ]
 | |
|         elements = set_element_hierarchy(elements_to_set)
 | |
| 
 | |
|         assert (
 | |
|             elements[1].metadata.parent_id == elements[0].id
 | |
|         ), "NarrativeText should be child of Title"
 | |
|         assert (
 | |
|             elements[2].metadata.parent_id == elements[0].id
 | |
|         ), "FigureCaption should be child of Title"
 | |
|         assert elements[3].metadata.parent_id == elements[0].id, "ListItem should be child of Title"
 | |
|         assert elements[4].metadata.parent_id == elements[3].id, "ListItem should be child of Title"
 | |
|         assert elements[5].metadata.parent_id == elements[3].id, "ListItem should be child of Title"
 | |
|         assert elements[6].metadata.parent_id == elements[0].id, "ListItem should be child of Title"
 | |
|         # NOTE(Hubert): moving the category field to Element, caused this to fail.
 | |
|         # Checkboxes will soon be deprecated, then we can remove the test.
 | |
|         # assert (
 | |
|         #         elements[7].metadata.parent_id is None
 | |
|         # ), "CheckBox should be None, as it's not a Text based element"
 | |
|         assert elements[8].metadata.parent_id is None, "Title 2 should be child of None"
 | |
|         assert (
 | |
|             elements[9].metadata.parent_id == elements[8].id
 | |
|         ), "ListItem should be child of Title 2"
 | |
|         assert (
 | |
|             elements[10].metadata.parent_id == elements[8].id
 | |
|         ), "ListItem should be child of Title 2"
 | |
|         assert elements[11].metadata.parent_id == elements[8].id, "Text should be child of Title 2"
 | |
| 
 | |
|     def it_applies_custom_rule_set(self):
 | |
|         elements_to_set = [
 | |
|             Header(text="Header"),  # 0
 | |
|             Title(text="Title"),  # 1
 | |
|             NarrativeText(text="NarrativeText"),  # 2
 | |
|             Text(text="Text"),  # 3
 | |
|             Title(text="Title 2"),  # 4
 | |
|             FigureCaption(text="FigureCaption"),  # 5
 | |
|         ]
 | |
| 
 | |
|         custom_rule_set = {
 | |
|             "Header": ["Title", "Text"],
 | |
|             "Title": ["NarrativeText", "UncategorizedText", "FigureCaption"],
 | |
|         }
 | |
| 
 | |
|         elements = set_element_hierarchy(
 | |
|             elements=elements_to_set,
 | |
|             ruleset=custom_rule_set,
 | |
|         )
 | |
| 
 | |
|         assert elements[1].metadata.parent_id == elements[0].id, "Title should be child of Header"
 | |
|         assert (
 | |
|             elements[2].metadata.parent_id == elements[1].id
 | |
|         ), "NarrativeText should be child of Title"
 | |
|         assert elements[3].metadata.parent_id == elements[1].id, "Text should be child of Title"
 | |
|         assert elements[4].metadata.parent_id == elements[0].id, "Title 2 should be child of Header"
 | |
|         assert (
 | |
|             elements[5].metadata.parent_id == elements[4].id
 | |
|         ), "FigureCaption should be child of Title 2"
 | |
| 
 | |
| 
 | |
| # ================================================================================================
 | |
| # APPLY METADATA DECORATOR
 | |
| # ================================================================================================
 | |
| 
 | |
| 
 | |
| class Describe_apply_metadata:
 | |
|     """Unit-test suite for `unstructured.partition.common.metadata.apply_metadata()` decorator."""
 | |
| 
 | |
|     # -- unique-ify elements and metadata ---------------------------------
 | |
| 
 | |
|     def it_produces_unique_elements_and_metadata_when_input_reuses_element_instances(self):
 | |
|         element = Text(text="Element", metadata=ElementMetadata(filename="foo.bar", page_number=1))
 | |
| 
 | |
|         def fake_partitioner(**kwargs: Any) -> list[Element]:
 | |
|             return [element, element, element]
 | |
| 
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
| 
 | |
|         # -- all elements are unique instances --
 | |
|         assert len({id(e) for e in elements}) == len(elements)
 | |
|         # -- all metadatas are unique instances --
 | |
|         assert len({id(e.metadata) for e in elements}) == len(elements)
 | |
| 
 | |
|     def and_it_produces_unique_elements_and_metadata_when_input_reuses_metadata_instances(self):
 | |
|         metadata = ElementMetadata(filename="foo.bar", page_number=1)
 | |
| 
 | |
|         def fake_partitioner(**kwargs: Any) -> list[Element]:
 | |
|             return [
 | |
|                 Text(text="foo", metadata=metadata),
 | |
|                 Text(text="bar", metadata=metadata),
 | |
|                 Text(text="baz", metadata=metadata),
 | |
|             ]
 | |
| 
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
| 
 | |
|         # -- all elements are unique instances --
 | |
|         assert len({id(e) for e in elements}) == len(elements)
 | |
|         # -- all metadatas are unique instances --
 | |
|         assert len({id(e.metadata) for e in elements}) == len(elements)
 | |
| 
 | |
|     # -- unique-ids -------------------------------------------------------
 | |
| 
 | |
|     def it_assigns_hash_element_ids_when_unique_ids_arg_is_not_specified(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
|         elements_2 = partition()
 | |
| 
 | |
|         # -- SHA1 hash is 32 characters long, no hyphens --
 | |
|         assert all(len(e.id) == 32 for e in elements)
 | |
|         assert all("-" not in e.id for e in elements)
 | |
|         # -- SHA1 hashes are deterministic --
 | |
|         assert all(e.id == e2.id for e, e2 in zip(elements, elements_2))
 | |
| 
 | |
|     def it_assigns_hash_element_ids_when_unique_ids_arg_is_False(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition(unique_element_ids=False)
 | |
|         elements_2 = partition(unique_element_ids=False)
 | |
| 
 | |
|         # -- SHA1 hash is 32 characters long, no hyphens --
 | |
|         assert all(len(e.id) == 32 for e in elements)
 | |
|         assert all("-" not in e.id for e in elements)
 | |
|         # -- SHA1 hashes are deterministic --
 | |
|         assert all(e.id == e2.id for e, e2 in zip(elements, elements_2))
 | |
| 
 | |
|     def it_leaves_UUID_element_ids_when_unique_ids_arg_is_True(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition(unique_element_ids=True)
 | |
|         elements_2 = partition(unique_element_ids=True)
 | |
| 
 | |
|         # -- UUID is 36 characters long with four hyphens --
 | |
|         assert all(len(e.id) == 36 for e in elements)
 | |
|         assert all(e.id.count("-") == 4 for e in elements)
 | |
|         # -- UUIDs are non-deterministic, different every time --
 | |
|         assert all(e.id != e2.id for e, e2 in zip(elements, elements_2))
 | |
| 
 | |
|     # -- parent-id --------------------------------------------------------
 | |
| 
 | |
|     def it_computes_and_assigns_parent_id(self, fake_partitioner: Callable[..., list[Element]]):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
| 
 | |
|         title = elements[0]
 | |
|         assert title.metadata.category_depth == 1
 | |
|         narr_text = elements[1]
 | |
|         assert narr_text.metadata.parent_id == title.id
 | |
| 
 | |
|     # -- languages --------------------------------------------------------
 | |
| 
 | |
|     def it_applies_language_metadata(self, fake_partitioner: Callable[..., list[Element]]):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition(languages=["auto"], detect_language_per_element=True)
 | |
| 
 | |
|         assert all(e.metadata.languages == ["eng"] for e in elements)
 | |
| 
 | |
|     # -- filetype (MIME-type) ---------------------------------------------
 | |
| 
 | |
|     def it_assigns_the_value_of_a_metadata_file_type_arg_when_there_is_one(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         """A `metadata_file_type` arg overrides the file-type specified in the decorator.
 | |
| 
 | |
|         This is used for example by a delegating partitioner to preserve the original file-type in
 | |
|         the metadata, like EPUB instead of the HTML that partitioner converts the .epub file to.
 | |
|         """
 | |
|         partition = apply_metadata(file_type=FileType.DOCX)(fake_partitioner)
 | |
| 
 | |
|         elements = partition(metadata_file_type=FileType.ODT)
 | |
| 
 | |
|         assert all(
 | |
|             e.metadata.filetype == "application/vnd.oasis.opendocument.text" for e in elements
 | |
|         )
 | |
| 
 | |
|     def and_it_assigns_the_decorator_file_type_when_the_metadata_file_type_arg_is_omitted(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         """The `file_type=...` decorator arg is the "normal" way to specify the file-type.
 | |
| 
 | |
|         This is used for principal (non-delegating) partitioners.
 | |
|         """
 | |
|         partition = apply_metadata(file_type=FileType.DOCX)(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
| 
 | |
|         DOCX_MIME_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
 | |
|         assert all(e.metadata.filetype == DOCX_MIME_TYPE for e in elements)
 | |
| 
 | |
|     def and_it_does_not_assign_file_type_metadata_when_both_are_omitted(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         """A partitioner can elect to assign `.metadata.filetype` for itself.
 | |
| 
 | |
|         This is done in `partition_image()` for example where the same partitioner is used for
 | |
|         multiple file-types.
 | |
|         """
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
| 
 | |
|         assert all(e.metadata.filetype == "image/jpeg" for e in elements)
 | |
| 
 | |
|     # -- filename ---------------------------------------------------------
 | |
| 
 | |
|     def it_uses_metadata_filename_arg_value_when_present(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         """A `metadata_filename` arg overrides all other sources."""
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition(metadata_filename="a/b/c.xyz")
 | |
| 
 | |
|         assert all(e.metadata.filename == "c.xyz" for e in elements)
 | |
|         assert all(e.metadata.file_directory == "a/b" for e in elements)
 | |
| 
 | |
|     def and_it_uses_filename_arg_value_when_metadata_filename_arg_not_present(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition(filename="a/b/c.xyz")
 | |
| 
 | |
|         assert all(e.metadata.filename == "c.xyz" for e in elements)
 | |
|         assert all(e.metadata.file_directory == "a/b" for e in elements)
 | |
| 
 | |
|     def and_it_does_not_assign_filename_metadata_when_neither_are_present(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
| 
 | |
|         assert all(e.metadata.filename == "image.jpeg" for e in elements)
 | |
|         assert all(e.metadata.file_directory == "x/y/images" for e in elements)
 | |
| 
 | |
|     # -- last_modified ----------------------------------------------------
 | |
| 
 | |
|     def it_uses_metadata_last_modified_arg_value_when_present(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         """A `metadata_last_modified` arg overrides all other sources."""
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
|         metadata_last_modified = "2024-09-26T15:17:53"
 | |
| 
 | |
|         elements = partition(metadata_last_modified=metadata_last_modified)
 | |
| 
 | |
|         assert all(e.metadata.last_modified == metadata_last_modified for e in elements)
 | |
| 
 | |
|     @pytest.mark.parametrize("kwargs", [{}, {"metadata_last_modified": None}])
 | |
|     def but_it_does_not_update_last_modified_when_metadata_last_modified_arg_absent_or_None(
 | |
|         self, kwargs: dict[str, Any], fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition(**kwargs)
 | |
| 
 | |
|         assert all(e.metadata.last_modified == "2020-01-06T05:07:03" for e in elements)
 | |
| 
 | |
|     # -- url --------------------------------------------------------------
 | |
| 
 | |
|     def it_assigns_url_metadata_field_when_url_arg_is_present(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition(url="https://adobe.com/stock/54321")
 | |
| 
 | |
|         assert all(e.metadata.url == "https://adobe.com/stock/54321" for e in elements)
 | |
| 
 | |
|     def and_it_does_not_assign_url_metadata_when_url_arg_is_not_present(
 | |
|         self, fake_partitioner: Callable[..., list[Element]]
 | |
|     ):
 | |
|         partition = apply_metadata()(fake_partitioner)
 | |
| 
 | |
|         elements = partition()
 | |
| 
 | |
|         assert all(e.metadata.url == "http://images.com" for e in elements)
 | |
| 
 | |
|     # -- fixtures --------------------------------------------------------------------------------
 | |
| 
 | |
|     @pytest.fixture
 | |
|     def fake_partitioner(self) -> Callable[..., list[Element]]:
 | |
|         def fake_partitioner(**kwargs: Any) -> list[Element]:
 | |
|             title = Title("Introduction")
 | |
|             title.metadata.category_depth = 1
 | |
|             title.metadata.file_directory = "x/y/images"
 | |
|             title.metadata.filename = "image.jpeg"
 | |
|             title.metadata.filetype = "image/jpeg"
 | |
|             title.metadata.last_modified = "2020-01-06T05:07:03"
 | |
|             title.metadata.url = "http://images.com"
 | |
| 
 | |
|             narr_text = NarrativeText("To understand bar you must first understand foo.")
 | |
|             narr_text.metadata.file_directory = "x/y/images"
 | |
|             narr_text.metadata.filename = "image.jpeg"
 | |
|             narr_text.metadata.filetype = "image/jpeg"
 | |
|             narr_text.metadata.last_modified = "2020-01-06T05:07:03"
 | |
|             narr_text.metadata.url = "http://images.com"
 | |
| 
 | |
|             return [title, narr_text]
 | |
| 
 | |
|         return fake_partitioner
 | |
| 
 | |
| 
 | |
| # ================================================================================================
 | |
| # HASH IDS
 | |
| # ================================================================================================
 | |
| 
 | |
| 
 | |
| def test_assign_hash_ids_produces_unique_and_deterministic_SHA1_ids_even_for_duplicate_elements():
 | |
|     elements: list[Element] = [
 | |
|         Text(text="Element", metadata=ElementMetadata(filename="foo.bar", page_number=1)),
 | |
|         Text(text="Element", metadata=ElementMetadata(filename="foo.bar", page_number=1)),
 | |
|         Text(text="Element", metadata=ElementMetadata(filename="foo.bar", page_number=1)),
 | |
|     ]
 | |
|     # -- default ids are UUIDs --
 | |
|     assert all(len(e.id) == 36 for e in elements)
 | |
| 
 | |
|     elements = _assign_hash_ids(copy.deepcopy(elements))
 | |
|     elements_2 = _assign_hash_ids(copy.deepcopy(elements))
 | |
| 
 | |
|     ids = [e.id for e in elements]
 | |
|     # -- ids are now SHA1 --
 | |
|     assert all(len(e.id) == 32 for e in elements)
 | |
|     # -- each id is unique --
 | |
|     assert len(ids) == len(set(ids))
 | |
|     # -- ids are deterministic, same value is computed each time --
 | |
|     assert all(e.id == e2.id for e, e2 in zip(elements, elements_2))
 |