Upgrade transformers version to 4.18.0 (#2514)

* Upgrade transformers version to 4.18.0

* Adapt tokenization test to upgrade

* Adapt tokenization test to upgrade
This commit is contained in:
bogdankostic 2022-05-06 16:57:13 +02:00 committed by GitHub
parent caf1336424
commit bce84577c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 4 deletions

View File

@ -56,7 +56,7 @@ install_requires =
torch>1.9,<1.11
requests
pydantic
transformers==4.13.0
transformers==4.18.0
nltk
pandas

View File

@ -266,7 +266,7 @@ def test_all_tokenizer_on_special_cases(caplog):
"This is a sentence with multiple tabs",
]
expected_to_fail = [(1, 1), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (2, 1), (2, 5)]
expected_to_fail = {(2, 1), (2, 5)}
for i_tok, tokenizer in enumerate(tokenizers):
for i_text, text in enumerate(texts):
@ -299,8 +299,6 @@ def test_all_tokenizer_on_special_cases(caplog):
for ((start, end), w_index) in zip(encoded.offsets, encoded.words):
word_start_ch = word_spans[w_index][0]
token_offsets.append((start + word_start_ch, end + word_start_ch))
if getattr(tokenizer, "add_prefix_space", None):
token_offsets = [(start - 1, end) for start, end in token_offsets]
# verify that offsets align back to original text
if text == "力加勝北区ᴵᴺᵀᵃছজটডণত":