mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-20 04:18:57 +00:00
Fix download ntlk preprocessor (#852)
This commit is contained in:
parent
e641bff7a6
commit
ba91a90dd6
@ -44,7 +44,11 @@ class PreProcessor(BasePreProcessor):
|
|||||||
to True, the individual split will always have complete sentences &
|
to True, the individual split will always have complete sentences &
|
||||||
the number of words will be <= split_length.
|
the number of words will be <= split_length.
|
||||||
"""
|
"""
|
||||||
nltk.download("punkt")
|
try:
|
||||||
|
nltk.data.find('tokenizers/punkt')
|
||||||
|
except LookupError:
|
||||||
|
nltk.download('punkt')
|
||||||
|
|
||||||
self.clean_whitespace = clean_whitespace
|
self.clean_whitespace = clean_whitespace
|
||||||
self.clean_header_footer = clean_header_footer
|
self.clean_header_footer = clean_header_footer
|
||||||
self.clean_empty_lines = clean_empty_lines
|
self.clean_empty_lines = clean_empty_lines
|
||||||
|
Loading…
x
Reference in New Issue
Block a user