mirror of
				https://github.com/langgenius/dify.git
				synced 2025-11-04 04:43:09 +00:00 
			
		
		
		
	fix: split text keep separator (#7930)
This commit is contained in:
		
							parent
							
								
									7b2cf8215f
								
							
						
					
					
						commit
						571415d1a4
					
				@ -30,15 +30,14 @@ def _split_text_with_regex(
 | 
			
		||||
        if keep_separator:
 | 
			
		||||
            # The parentheses in the pattern keep the delimiters in the result.
 | 
			
		||||
            _splits = re.split(f"({re.escape(separator)})", text)
 | 
			
		||||
            splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
 | 
			
		||||
            if len(_splits) % 2 == 0:
 | 
			
		||||
            splits = [_splits[i - 1] + _splits[i] for i in range(1, len(_splits), 2)]
 | 
			
		||||
            if len(_splits) % 2 != 0:
 | 
			
		||||
                splits += _splits[-1:]
 | 
			
		||||
            splits = [_splits[0]] + splits
 | 
			
		||||
        else:
 | 
			
		||||
            splits = re.split(separator, text)
 | 
			
		||||
    else:
 | 
			
		||||
        splits = list(text)
 | 
			
		||||
    return [s for s in splits if s != ""]
 | 
			
		||||
    return [s for s in splits if (s != "" and s != '\n')]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TextSplitter(BaseDocumentTransformer, ABC):
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user