mirror of
				https://github.com/langgenius/dify.git
				synced 2025-11-04 04:43:09 +00:00 
			
		
		
		
	fix: split text keep separator (#7930)
This commit is contained in:
		
							parent
							
								
									7b2cf8215f
								
							
						
					
					
						commit
						571415d1a4
					
				@ -30,15 +30,14 @@ def _split_text_with_regex(
 | 
				
			|||||||
        if keep_separator:
 | 
					        if keep_separator:
 | 
				
			||||||
            # The parentheses in the pattern keep the delimiters in the result.
 | 
					            # The parentheses in the pattern keep the delimiters in the result.
 | 
				
			||||||
            _splits = re.split(f"({re.escape(separator)})", text)
 | 
					            _splits = re.split(f"({re.escape(separator)})", text)
 | 
				
			||||||
            splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
 | 
					            splits = [_splits[i - 1] + _splits[i] for i in range(1, len(_splits), 2)]
 | 
				
			||||||
            if len(_splits) % 2 == 0:
 | 
					            if len(_splits) % 2 != 0:
 | 
				
			||||||
                splits += _splits[-1:]
 | 
					                splits += _splits[-1:]
 | 
				
			||||||
            splits = [_splits[0]] + splits
 | 
					 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            splits = re.split(separator, text)
 | 
					            splits = re.split(separator, text)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        splits = list(text)
 | 
					        splits = list(text)
 | 
				
			||||||
    return [s for s in splits if s != ""]
 | 
					    return [s for s in splits if (s != "" and s != '\n')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TextSplitter(BaseDocumentTransformer, ABC):
 | 
					class TextSplitter(BaseDocumentTransformer, ABC):
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user