mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-25 08:04:49 +00:00
Upgrade transformers version to 4.18.0 (#2514)
* Upgrade transformers version to 4.18.0 * Adapt tokenization test to upgrade * Adapt tokenization test to upgrade
This commit is contained in:
parent
caf1336424
commit
bce84577c6
@ -56,7 +56,7 @@ install_requires =
|
||||
torch>1.9,<1.11
|
||||
requests
|
||||
pydantic
|
||||
transformers==4.13.0
|
||||
transformers==4.18.0
|
||||
nltk
|
||||
pandas
|
||||
|
||||
|
@ -266,7 +266,7 @@ def test_all_tokenizer_on_special_cases(caplog):
|
||||
"This is a sentence with multiple tabs",
|
||||
]
|
||||
|
||||
expected_to_fail = [(1, 1), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (2, 1), (2, 5)]
|
||||
expected_to_fail = {(2, 1), (2, 5)}
|
||||
|
||||
for i_tok, tokenizer in enumerate(tokenizers):
|
||||
for i_text, text in enumerate(texts):
|
||||
@ -299,8 +299,6 @@ def test_all_tokenizer_on_special_cases(caplog):
|
||||
for ((start, end), w_index) in zip(encoded.offsets, encoded.words):
|
||||
word_start_ch = word_spans[w_index][0]
|
||||
token_offsets.append((start + word_start_ch, end + word_start_ch))
|
||||
if getattr(tokenizer, "add_prefix_space", None):
|
||||
token_offsets = [(start - 1, end) for start, end in token_offsets]
|
||||
|
||||
# verify that offsets align back to original text
|
||||
if text == "力加勝北区ᴵᴺᵀᵃছজটডণত":
|
||||
|
Loading…
x
Reference in New Issue
Block a user