upgrade transformers and reorganize extras (#7815)

This commit is contained in:
Stefano Fiorucci 2024-06-06 15:57:18 +02:00 committed by GitHub
parent 3c8569e12c
commit bde92fda67
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -100,23 +100,25 @@ format-check = "black --check ."
[tool.hatch.envs.test] [tool.hatch.envs.test]
extra-dependencies = [ extra-dependencies = [
"transformers[torch,sentencepiece]==4.38.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators... "transformers[torch,sentencepiece]==4.41.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
"huggingface_hub>=0.23.0", # TGI Generators and TEI Embedders "huggingface_hub>=0.23.0", # Hugging Face API Generators and Embedders
"spacy>=3.7,<3.8", # NamedEntityExtractor "sentence-transformers>=2.2.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
"spacy-curated-transformers>=0.2,<=0.3", # NamedEntityExtractor "langdetect", # TextLanguageRouter and DocumentLanguageClassifier
"en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl", # NamedEntityExtractor "openai-whisper>=20231106", # LocalWhisperTranscriber
# NamedEntityExtractor
"spacy>=3.7,<3.8",
"spacy-curated-transformers>=0.2,<=0.3",
"en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl",
# Converters # Converters
"pypdf", # PyPDFConverter "pypdf", # PyPDFToDocument
"pdfminer.six", # PDFMinerToDocument "pdfminer.six", # PDFMinerToDocument
"markdown-it-py", # MarkdownToDocument "markdown-it-py", # MarkdownToDocument
"mdit_plain", # MarkdownToDocument "mdit_plain", # MarkdownToDocument
"tika", # TikaDocumentConverter "tika", # TikaDocumentConverter
"azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter "azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter
"langdetect", # TextLanguageRouter and DocumentLanguageClassifier "trafilatura", # HTMLToDocument
"sentence-transformers>=2.2.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
"openai-whisper>=20231106", # LocalWhisperTranscriber
"trafilatura", # Fulltext extraction from HTML pages
# OpenAPI # OpenAPI
"jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions