enhancement: Speed up function _assign_hash_ids by 34% (#4101)

In-repo duplicate of #4089.

---------

Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com>
Co-authored-by: Aseem Saxena <aseem.bits@gmail.com>
This commit is contained in:
qued 2025-09-25 15:49:15 -05:00 committed by GitHub
parent 2d44d73a88
commit ef68384985
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 17 additions and 12 deletions

View File

@ -1,3 +1,12 @@
## 0.18.16-dev0
### Enhancement
- Speed up function _assign_hash_ids by 34% (codeflash)
### Features
### Fixes
## 0.18.15
### Enhancements

View File

@ -360,7 +360,7 @@
"eng"
],
"page_number": 1,
"orig_elements": "eJztUsmO2zAM/RVB58ZLYidxjz3NaVCg6WkwMGiLsoVYCyQ6TRHk30u1GWBaoH8wN1Hk01vEl5vEBS066o2Sn4VEPNTVfoBKbZvq0FVNpSuNXdvVGnYwNPKTkBYJFBDw/E3mQ5/8Gkf8XQeM1qRkvEv9Y+jlJq1Xub3b1cfj/ZXfiDj6qPrFj0A+/kECzVlCOXuLZVydw1j+8PFcri5RXEdaI6q/C7yCDQtulB9TOYNTg/fnTR0KvrjK+52ZtFmQfoZMLyGExTAjqysvThU+oLvaRftogdLGa21GZOiaEymYW4XoR2Q7brJL8dbJISzgphUmTNmgRDfJbCvwTe9WO2D2VGd+witl7tNsknh6SBTKYxLOkxgjAqEApUyWBYuIZpopCZjAsFVBM4pEjGMNgp0nQhQ+iu/OECrxjRiexOnRME5ouHDba+EZGkWASAZTkUW/5fAMMXIKFzxlcazy3zXoukrtj1V7GLb7o1aNbrbNoBpox6FtVbf7WIP/r8H7T38f+lce+8K/fZb3118ujDSv"
"orig_elements": "eJztUsmO2zAM/RVB58bL2Nl67KmnQYGmp8HAoCXKFmItkOg0RZB/L9VmgGmB/kFvosint4gvN4kLOvQ0WC0/Com4b5vdCI1+6pv9sekb0xg8bo+tgQ7GXn4Q0iGBBgKev8lyGHJYk8JfdcTkbM42+Dw8hl5u0gVd2l3XHg73V34joQpJD0tQQCH9RgLNRUI9B4d1Wr3HVH8P6VyvPlNaFa0J9Z8FXsHFBTc6qFzP4PUYwnnTxoovrvJ+ZyZjF6QfsdBLiHGxzMjq6ovXVYjor24xITmgvAnGWIUMXUsiFXPrmIJCtuMnt1RvnRLCAn5aYcJcDEr0kyy2It8MfnUjFk9t4Se8UuE+zTaLzw+JQgfMwgcSKiEQCtDaFlmwiGSnmbKACSxbFTSjyMQ41iDYeSZEEZL45i2hFl+J4VmcHg3rhYELt4MRgaFJREhkMVdF9FsOz5ASp3DBUxHHKv9eg2402wa2DW47rXBU4/6wM3vTd7tDt+8B/q/Bv9fg/ae/D/0Lj33i3z7L++tP43k1Mw=="
}
},
{
@ -760,7 +760,7 @@
"languages": [
"eng"
],
"orig_elements": "eJxVUEtuwyAQvUrEurbjpFGcbCv1ElFkjWFsowCDBmgdRb57IW0WXcEw78e7PAQatOhir5U4b8S7OnXbA5yOshskSnnctcOhha4dcbcd9kq8bYTFCAoiZPxDlEsfKLHE5+yRrQ5Bkwv9H+jyEJZUWe/3bdet16zBKIlVb0hCJP5lQpxLhGYmiw0n55Cbb+Jbk1yInGRMjOr/gAtYb7BSJEMzg1MD0a1qfZ0fFrGu2WnUBuPdF3sB3hudHXO65supmjy6xZqR2EIMFY2jlpipqTRSZ2/lmSTm77jJmvq1KSXMCAq5H4liPl4GnrUFvheAATclmDCUBgS6SVxLnIhLLMgP8nfW0/wUe9E/n2pivf4An2aN4g=="
"orig_elements": "eJxVUNFuwyAM/JWJ5zVZ1C6L9jppP1FVkQMmQQWMDGypqvz7IFsf9gS2z3fnO98FWnTo02iUeH8S0Es1dKfj6bV/64cJ9HQaZD9JOb1MEjstnp+EwwQKEhT8XdTPGCmzxL0OyM7EaMjH8Q90vgtHqo6Px24YtkvhYJTEarQkIRH/bkJaqoV2IYctZ++R22/ia5t9TJxlyozqf4EruGDxoEjGdgGvJqLroQtNaaxi24qSNhbTLeB+XQjWFMXirv3yqqGAfnVWEztI8UBaG4llNddEmqKtApPEco6fnW0ekxrCgqCQR02UyvMQCGwc8K0CLPg5w4yxJiDQz+JS7SRcU0V+ULixmZed7LH+ubOJ7fIDJz+OQw=="
}
}
]

View File

@ -1 +1 @@
__version__ = "0.18.15" # pragma: no cover
__version__ = "0.18.16-dev0" # pragma: no cover

View File

@ -5,7 +5,6 @@ from __future__ import annotations
import copy
import datetime as dt
import functools
import itertools
import os
from typing import Any, Callable, Iterator, Sequence
@ -252,15 +251,12 @@ def _assign_hash_ids(elements: list[Element]) -> list[Element]:
or more fragments for parallel processing.
"""
# -- generate sequence number for each element on a page --
page_numbers = [e.metadata.page_number for e in elements]
page_seq_numbers = [
seq_on_page
for _, group in itertools.groupby(page_numbers)
for seq_on_page, _ in enumerate(group)
]
for element, seq_on_page_counter in zip(elements, page_seq_numbers):
page_seq_counts = {}
for element in elements:
page_number = element.metadata.page_number
seq_on_page_counter = page_seq_counts.get(page_number, 0)
element.id_to_hash(seq_on_page_counter)
page_seq_counts[page_number] = seq_on_page_counter + 1
return elements