mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-15 09:33:34 +00:00
docs: TransformersImageToText- inform about supported models, better exception handling (#4310)
* better docs, exception handling and tests * Update lg * fix little error --------- Co-authored-by: agnieszka-m <amarzec13@gmail.com>
This commit is contained in:
parent
39a20c37fd
commit
444a3116c4
@ -19,12 +19,21 @@ logger = logging.getLogger(__name__)
|
|||||||
# see https://github.com/huggingface/transformers/issues/21110
|
# see https://github.com/huggingface/transformers/issues/21110
|
||||||
SUPPORTED_MODELS_CLASSES = ["VisionEncoderDecoderModel"]
|
SUPPORTED_MODELS_CLASSES = ["VisionEncoderDecoderModel"]
|
||||||
|
|
||||||
|
UNSUPPORTED_MODEL_MESSAGE = (
|
||||||
|
f"The supported classes are: {SUPPORTED_MODELS_CLASSES}. \n"
|
||||||
|
f"To find the supported models: \n"
|
||||||
|
f"1. Visit [image-to-text models on Hugging Face](https://huggingface.co/models?pipeline_tag=image-to-text). \n"
|
||||||
|
f"2. Open a model you want to check. \n"
|
||||||
|
f'3. On the model page, go to the "Files and Versions" tab \n'
|
||||||
|
f"4. Open the `config.json` file, and make sure the `architectures` field contains one of the supported classes: {SUPPORTED_MODELS_CLASSES}."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TransformersImageToText(BaseImageToText):
|
class TransformersImageToText(BaseImageToText):
|
||||||
"""
|
"""
|
||||||
A transformer-based model to generate captions for images using the Hugging Face's transformers framework.
|
A transformer-based model to generate captions for images using the Hugging Face's transformers framework.
|
||||||
|
|
||||||
For an up-to-date list of available models, see [Hugging Face image to text models](https://huggingface.co/models?pipeline_tag=image-to-text)`__
|
Currently, this node supports `VisionEncoderDecoderModel` models.
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
@ -65,11 +74,15 @@ class TransformersImageToText(BaseImageToText):
|
|||||||
devices: Optional[List[Union[str, torch.device]]] = None,
|
devices: Optional[List[Union[str, torch.device]]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Load an image-to-text model from transformers.
|
Load a `VisionEncoderDecoderModel` model from transformers.
|
||||||
For an up-to-date list of available models, see [Hugging Face image-to-text models](https://huggingface.co/models?pipeline_tag=image-to-text).
|
|
||||||
|
|
||||||
:param model_name_or_path: Directory of a saved model or the name of a public model.
|
:param model_name_or_path: Directory of a saved model or the name of a public model.
|
||||||
For a full list of models, see [Hugging Face image-to-text models](https://huggingface.co/models?pipeline_tag=image-to-text).
|
Currently, only `VisionEncoderDecoderModel` models are supported.
|
||||||
|
To find these models:
|
||||||
|
1. Visit [Hugging Face image to text models](https://huggingface.co/models?pipeline_tag=image-to-text).`
|
||||||
|
2. Open the model you want to check.
|
||||||
|
3. On the model page, go to the "Files and Versions" tab.
|
||||||
|
4. Open the `config.json` file and make sure the `architectures` field contains `VisionEncoderDecoderModel`.
|
||||||
:param model_version: The version of the model to use from the Hugging Face model hub. This can be the tag name, branch name, or commit hash.
|
:param model_version: The version of the model to use from the Hugging Face model hub. This can be the tag name, branch name, or commit hash.
|
||||||
:param generation_kwargs: Dictionary containing arguments for the `generate()` method of the Hugging Face model.
|
:param generation_kwargs: Dictionary containing arguments for the `generate()` method of the Hugging Face model.
|
||||||
See [generate()](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationMixin.generate) in Hugging Face documentation.
|
See [generate()](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationMixin.generate) in Hugging Face documentation.
|
||||||
@ -95,6 +108,7 @@ class TransformersImageToText(BaseImageToText):
|
|||||||
self.devices[0],
|
self.devices[0],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
self.model = pipeline(
|
self.model = pipeline(
|
||||||
task="image-to-text",
|
task="image-to-text",
|
||||||
model=model_name_or_path,
|
model=model_name_or_path,
|
||||||
@ -102,13 +116,17 @@ class TransformersImageToText(BaseImageToText):
|
|||||||
device=self.devices[0],
|
device=self.devices[0],
|
||||||
use_auth_token=use_auth_token,
|
use_auth_token=use_auth_token,
|
||||||
)
|
)
|
||||||
|
except KeyError as err:
|
||||||
|
raise ValueError(
|
||||||
|
f"The model '{model_name_or_path}' is not supported for ImageToText. " f"{UNSUPPORTED_MODEL_MESSAGE}"
|
||||||
|
) from err
|
||||||
|
|
||||||
|
# for some unsupported models, initializing the HF pipeline doesn't raise errors but does not work
|
||||||
model_class_name = self.model.model.__class__.__name__
|
model_class_name = self.model.model.__class__.__name__
|
||||||
if model_class_name not in SUPPORTED_MODELS_CLASSES:
|
if model_class_name not in SUPPORTED_MODELS_CLASSES:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The model of class '{model_class_name}' is not supported for ImageToText."
|
f"The model '{model_name_or_path}' (class '{model_class_name}') is not supported for ImageToText. "
|
||||||
f"The supported classes are: {SUPPORTED_MODELS_CLASSES}."
|
f"{UNSUPPORTED_MODEL_MESSAGE}"
|
||||||
f"You can find the availaible models here: https://huggingface.co/models?pipeline_tag=image-to-text."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.generation_kwargs = generation_kwargs
|
self.generation_kwargs = generation_kwargs
|
||||||
|
|||||||
@ -82,8 +82,17 @@ def test_image_to_text_not_image_document(image_to_text):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_image_to_text_unsupported_model():
|
def test_image_to_text_unsupported_model_after_loading():
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
ValueError, match="The model of class 'BertForQuestionAnswering' is not supported for ImageToText"
|
ValueError,
|
||||||
|
match="The model 'deepset/minilm-uncased-squad2' \(class 'BertForQuestionAnswering'\) is not supported for ImageToText",
|
||||||
):
|
):
|
||||||
_ = TransformersImageToText(model_name_or_path="deepset/minilm-uncased-squad2")
|
_ = TransformersImageToText(model_name_or_path="deepset/minilm-uncased-squad2")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_image_to_text_unsupported_model_before_loading():
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError, match="The model 'Salesforce/blip-image-captioning-base' is not supported for ImageToText"
|
||||||
|
):
|
||||||
|
_ = TransformersImageToText(model_name_or_path="Salesforce/blip-image-captioning-base")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user