mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-11-03 20:27:50 +00:00 
			
		
		
		
	fix(ingest): improve performance of get_allowed_list in AllowDenyPattern when dealing with large lists (#10219)
This commit is contained in:
		
							parent
							
								
									f36a597b17
								
							
						
					
					
						commit
						9eb6b2d68d
					
				@ -243,15 +243,21 @@ class AllowDenyPattern(ConfigModel):
 | 
			
		||||
        return AllowDenyPattern()
 | 
			
		||||
 | 
			
		||||
    def allowed(self, string: str) -> bool:
 | 
			
		||||
        for deny_pattern in self.deny:
 | 
			
		||||
            if re.match(deny_pattern, string, self.regex_flags):
 | 
			
		||||
                return False
 | 
			
		||||
        if self._denied(string):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        return any(
 | 
			
		||||
            re.match(allow_pattern, string, self.regex_flags)
 | 
			
		||||
            for allow_pattern in self.allow
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _denied(self, string: str) -> bool:
 | 
			
		||||
        for deny_pattern in self.deny:
 | 
			
		||||
            if re.match(deny_pattern, string, self.regex_flags):
 | 
			
		||||
                return True
 | 
			
		||||
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def is_fully_specified_allow_list(self) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        If the allow patterns are literals and not full regexes, then it is considered
 | 
			
		||||
@ -265,8 +271,11 @@ class AllowDenyPattern(ConfigModel):
 | 
			
		||||
 | 
			
		||||
    def get_allowed_list(self) -> List[str]:
 | 
			
		||||
        """Return the list of allowed strings as a list, after taking into account deny patterns, if possible"""
 | 
			
		||||
        assert self.is_fully_specified_allow_list()
 | 
			
		||||
        return [a for a in self.allow if self.allowed(a)]
 | 
			
		||||
        if not self.is_fully_specified_allow_list():
 | 
			
		||||
            raise ValueError(
 | 
			
		||||
                "allow list must be fully specified to get list of allowed strings"
 | 
			
		||||
            )
 | 
			
		||||
        return [a for a in self.allow if not self._denied(a)]
 | 
			
		||||
 | 
			
		||||
    def __eq__(self, other):  # type: ignore
 | 
			
		||||
        return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user