fix(ingest): improve performance of get_allowed_list in AllowDenyPattern when dealing with large lists (#10219)

This commit is contained in:
Felix Lüdin 2024-04-16 21:48:48 +02:00 committed by GitHub
parent f36a597b17
commit 9eb6b2d68d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -243,15 +243,21 @@ class AllowDenyPattern(ConfigModel):
return AllowDenyPattern() return AllowDenyPattern()
def allowed(self, string: str) -> bool: def allowed(self, string: str) -> bool:
for deny_pattern in self.deny: if self._denied(string):
if re.match(deny_pattern, string, self.regex_flags): return False
return False
return any( return any(
re.match(allow_pattern, string, self.regex_flags) re.match(allow_pattern, string, self.regex_flags)
for allow_pattern in self.allow for allow_pattern in self.allow
) )
def _denied(self, string: str) -> bool:
for deny_pattern in self.deny:
if re.match(deny_pattern, string, self.regex_flags):
return True
return False
def is_fully_specified_allow_list(self) -> bool: def is_fully_specified_allow_list(self) -> bool:
""" """
If the allow patterns are literals and not full regexes, then it is considered If the allow patterns are literals and not full regexes, then it is considered
@ -265,8 +271,11 @@ class AllowDenyPattern(ConfigModel):
def get_allowed_list(self) -> List[str]: def get_allowed_list(self) -> List[str]:
"""Return the list of allowed strings as a list, after taking into account deny patterns, if possible""" """Return the list of allowed strings as a list, after taking into account deny patterns, if possible"""
assert self.is_fully_specified_allow_list() if not self.is_fully_specified_allow_list():
return [a for a in self.allow if self.allowed(a)] raise ValueError(
"allow list must be fully specified to get list of allowed strings"
)
return [a for a in self.allow if not self._denied(a)]
def __eq__(self, other): # type: ignore def __eq__(self, other): # type: ignore
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__