diff --git a/FlagEmbedding/inference/embedder/decoder_only/base.py b/FlagEmbedding/inference/embedder/decoder_only/base.py index 7485c52..6376d85 100644 --- a/FlagEmbedding/inference/embedder/decoder_only/base.py +++ b/FlagEmbedding/inference/embedder/decoder_only/base.py @@ -47,8 +47,6 @@ class BaseLLMEmbedder(AbsEmbedder): batch_size (int, optional): Batch size for inference. Defaults to :data:`256`. query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`. passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`. - instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`. - instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`. convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor. Defaults to :data:`True`. @@ -72,8 +70,6 @@ class BaseLLMEmbedder(AbsEmbedder): batch_size: int = 256, query_max_length: int = 512, passage_max_length: int = 512, - instruction: Optional[str] = None, - instruction_format: str = "{}{}", convert_to_numpy: bool = True, **kwargs: Any, ): @@ -87,8 +83,6 @@ class BaseLLMEmbedder(AbsEmbedder): batch_size=batch_size, query_max_length=query_max_length, passage_max_length=passage_max_length, - instruction=instruction, - instruction_format=instruction_format, convert_to_numpy=convert_to_numpy, **kwargs ) diff --git a/FlagEmbedding/inference/embedder/decoder_only/icl.py b/FlagEmbedding/inference/embedder/decoder_only/icl.py index 136d7c1..cc46b78 100644 --- a/FlagEmbedding/inference/embedder/decoder_only/icl.py +++ b/FlagEmbedding/inference/embedder/decoder_only/icl.py @@ -54,6 +54,8 @@ class ICLLLMEmbedder(AbsEmbedder): batch_size (int, optional): Batch size for inference. Defaults to :data:`256`. query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`. passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`. + convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor. + Defaults to :data:`True`. Attributes: DEFAULT_POOLING_METHOD: The default pooling method when running the model. @@ -77,8 +79,6 @@ class ICLLLMEmbedder(AbsEmbedder): batch_size: int = 256, query_max_length: int = 512, passage_max_length: int = 512, - instruction: Optional[str] = None, - instruction_format: str = "{}{}", convert_to_numpy: bool = True, **kwargs: Any, ): @@ -92,10 +92,8 @@ class ICLLLMEmbedder(AbsEmbedder): batch_size=batch_size, query_max_length=query_max_length, passage_max_length=passage_max_length, - instruction=instruction, - instruction_format=instruction_format, convert_to_numpy=convert_to_numpy, - kwargs=kwargs + **kwargs ) self.tokenizer = AutoTokenizer.from_pretrained( diff --git a/FlagEmbedding/inference/embedder/encoder_only/base.py b/FlagEmbedding/inference/embedder/encoder_only/base.py index f1d0e90..a66223a 100644 --- a/FlagEmbedding/inference/embedder/encoder_only/base.py +++ b/FlagEmbedding/inference/embedder/encoder_only/base.py @@ -28,8 +28,6 @@ class BaseEmbedder(AbsEmbedder): batch_size (int, optional): Batch size for inference. Defaults to :data:`256`. query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`. passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`. - instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`. - instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`. convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor. Defaults to :data:`True`. @@ -55,8 +53,6 @@ class BaseEmbedder(AbsEmbedder): batch_size: int = 256, query_max_length: int = 512, passage_max_length: int = 512, - instruction: Optional[str] = None, - instruction_format: str = "{}{}", convert_to_numpy: bool = True, **kwargs: Any, ): @@ -70,8 +66,6 @@ class BaseEmbedder(AbsEmbedder): batch_size=batch_size, query_max_length=query_max_length, passage_max_length=passage_max_length, - instruction=instruction, - instruction_format=instruction_format, convert_to_numpy=convert_to_numpy, **kwargs ) @@ -201,9 +195,6 @@ class BaseEmbedder(AbsEmbedder): if device == "cpu": self.use_fp16 = False if self.use_fp16: self.model.half() - if device == "cpu": self.use_fp16 = False - if self.use_fp16: self.model.half() - self.model.to(device) self.model.eval() diff --git a/FlagEmbedding/inference/embedder/encoder_only/m3.py b/FlagEmbedding/inference/embedder/encoder_only/m3.py index 83c75d6..8416b8c 100644 --- a/FlagEmbedding/inference/embedder/encoder_only/m3.py +++ b/FlagEmbedding/inference/embedder/encoder_only/m3.py @@ -38,8 +38,6 @@ class M3Embedder(AbsEmbedder): batch_size (int, optional): Batch size for inference. Defaults to :data:`256`. query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`. passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`. - instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`. - instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`. return_dense (bool, optional): If true, will return the dense embedding. Defaults to :data:`True`. return_sparse (bool, optional): If true, will return the sparce embedding. Defaults to :data:`False`. return_colbert_vecs (bool, optional): If true, will return the colbert vectors. Defaults to :data:`False`. @@ -66,8 +64,6 @@ class M3Embedder(AbsEmbedder): batch_size: int = 256, query_max_length: int = 512, passage_max_length: int = 512, - instruction: Optional[str] = None, - instruction_format: str = "{}{}", return_dense: bool = True, return_sparse: bool = False, return_colbert_vecs: bool = False, @@ -83,8 +79,6 @@ class M3Embedder(AbsEmbedder): batch_size=batch_size, query_max_length=query_max_length, passage_max_length=passage_max_length, - instruction=instruction, - instruction_format=instruction_format, return_dense=return_dense, return_sparse=return_sparse, return_colbert_vecs=return_colbert_vecs, diff --git a/research/BGE_M3/modeling.py b/research/BGE_M3/modeling.py index 51c2655..06d6215 100644 --- a/research/BGE_M3/modeling.py +++ b/research/BGE_M3/modeling.py @@ -221,7 +221,7 @@ class BGEM3Model(nn.Module): if teacher_scores is not None: # print("Use soft-label distillation...") teacher_targets = F.softmax(teacher_scores, dim=-1) # B N - group_size = p_sparse_vecs.size(0) // q_sparse_vecs.size(0) + group_size = p_dense_vecs.size(0) // q_dense_vecs.size(0) # dense loss dense_scores = self.dense_score(q_dense_vecs, p_dense_vecs) # B, B * N diff --git a/research/visual_bge/setup.py b/research/visual_bge/setup.py new file mode 100644 index 0000000..40a80f5 --- /dev/null +++ b/research/visual_bge/setup.py @@ -0,0 +1,18 @@ +from setuptools import setup, find_packages + +setup( + name="visual_bge", + version="0.1.0", + description='visual_bge', + long_description="./README.md", + long_description_content_type="text/markdown", + url='https://github.com/FlagOpen/FlagEmbedding/tree/master/research/visual_bge', + packages=find_packages(), + install_requires=[ + 'torchvision', + 'timm', + 'einops', + 'ftfy' + ], + python_requires='>=3.6', +) diff --git a/research/visual_bge/eva_clip/__init__.py b/research/visual_bge/visual_bge/eva_clip/__init__.py similarity index 100% rename from research/visual_bge/eva_clip/__init__.py rename to research/visual_bge/visual_bge/eva_clip/__init__.py diff --git a/research/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz b/research/visual_bge/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz similarity index 100% rename from research/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz rename to research/visual_bge/visual_bge/eva_clip/bpe_simple_vocab_16e6.txt.gz diff --git a/research/visual_bge/eva_clip/constants.py b/research/visual_bge/visual_bge/eva_clip/constants.py similarity index 100% rename from research/visual_bge/eva_clip/constants.py rename to research/visual_bge/visual_bge/eva_clip/constants.py diff --git a/research/visual_bge/eva_clip/eva_vit_model.py b/research/visual_bge/visual_bge/eva_clip/eva_vit_model.py similarity index 100% rename from research/visual_bge/eva_clip/eva_vit_model.py rename to research/visual_bge/visual_bge/eva_clip/eva_vit_model.py diff --git a/research/visual_bge/eva_clip/factory.py b/research/visual_bge/visual_bge/eva_clip/factory.py similarity index 100% rename from research/visual_bge/eva_clip/factory.py rename to research/visual_bge/visual_bge/eva_clip/factory.py diff --git a/research/visual_bge/eva_clip/hf_configs.py b/research/visual_bge/visual_bge/eva_clip/hf_configs.py similarity index 100% rename from research/visual_bge/eva_clip/hf_configs.py rename to research/visual_bge/visual_bge/eva_clip/hf_configs.py diff --git a/research/visual_bge/eva_clip/hf_model.py b/research/visual_bge/visual_bge/eva_clip/hf_model.py similarity index 100% rename from research/visual_bge/eva_clip/hf_model.py rename to research/visual_bge/visual_bge/eva_clip/hf_model.py diff --git a/research/visual_bge/eva_clip/loss.py b/research/visual_bge/visual_bge/eva_clip/loss.py similarity index 100% rename from research/visual_bge/eva_clip/loss.py rename to research/visual_bge/visual_bge/eva_clip/loss.py diff --git a/research/visual_bge/eva_clip/model.py b/research/visual_bge/visual_bge/eva_clip/model.py similarity index 100% rename from research/visual_bge/eva_clip/model.py rename to research/visual_bge/visual_bge/eva_clip/model.py diff --git a/research/visual_bge/eva_clip/model_configs/EVA01-CLIP-B-16.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-B-16.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA01-CLIP-B-16.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-B-16.json diff --git a/research/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14-plus.json diff --git a/research/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA01-CLIP-g-14.json diff --git a/research/visual_bge/eva_clip/model_configs/EVA02-CLIP-B-16.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-B-16.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA02-CLIP-B-16.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-B-16.json diff --git a/research/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14-336.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14-336.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14-336.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14-336.json diff --git a/research/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-L-14.json diff --git a/research/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14-plus.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14-plus.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14-plus.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14-plus.json diff --git a/research/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14.json b/research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14.json similarity index 100% rename from research/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14.json rename to research/visual_bge/visual_bge/eva_clip/model_configs/EVA02-CLIP-bigE-14.json diff --git a/research/visual_bge/eva_clip/modified_resnet.py b/research/visual_bge/visual_bge/eva_clip/modified_resnet.py similarity index 99% rename from research/visual_bge/eva_clip/modified_resnet.py rename to research/visual_bge/visual_bge/eva_clip/modified_resnet.py index d0dcc4a..3be17ce 100644 --- a/research/visual_bge/eva_clip/modified_resnet.py +++ b/research/visual_bge/visual_bge/eva_clip/modified_resnet.py @@ -4,7 +4,7 @@ import torch from torch import nn from torch.nn import functional as F -from FlagEmbedding.visual.eva_clip.utils import freeze_batch_norm_2d +from visual_bge.eva_clip.utils import freeze_batch_norm_2d class Bottleneck(nn.Module): diff --git a/research/visual_bge/eva_clip/openai.py b/research/visual_bge/visual_bge/eva_clip/openai.py similarity index 100% rename from research/visual_bge/eva_clip/openai.py rename to research/visual_bge/visual_bge/eva_clip/openai.py diff --git a/research/visual_bge/eva_clip/pretrained.py b/research/visual_bge/visual_bge/eva_clip/pretrained.py similarity index 100% rename from research/visual_bge/eva_clip/pretrained.py rename to research/visual_bge/visual_bge/eva_clip/pretrained.py diff --git a/research/visual_bge/eva_clip/rope.py b/research/visual_bge/visual_bge/eva_clip/rope.py similarity index 100% rename from research/visual_bge/eva_clip/rope.py rename to research/visual_bge/visual_bge/eva_clip/rope.py diff --git a/research/visual_bge/eva_clip/timm_model.py b/research/visual_bge/visual_bge/eva_clip/timm_model.py similarity index 100% rename from research/visual_bge/eva_clip/timm_model.py rename to research/visual_bge/visual_bge/eva_clip/timm_model.py diff --git a/research/visual_bge/eva_clip/tokenizer.py b/research/visual_bge/visual_bge/eva_clip/tokenizer.py similarity index 100% rename from research/visual_bge/eva_clip/tokenizer.py rename to research/visual_bge/visual_bge/eva_clip/tokenizer.py diff --git a/research/visual_bge/eva_clip/transform.py b/research/visual_bge/visual_bge/eva_clip/transform.py similarity index 100% rename from research/visual_bge/eva_clip/transform.py rename to research/visual_bge/visual_bge/eva_clip/transform.py diff --git a/research/visual_bge/eva_clip/transformer.py b/research/visual_bge/visual_bge/eva_clip/transformer.py similarity index 100% rename from research/visual_bge/eva_clip/transformer.py rename to research/visual_bge/visual_bge/eva_clip/transformer.py diff --git a/research/visual_bge/eva_clip/utils.py b/research/visual_bge/visual_bge/eva_clip/utils.py similarity index 100% rename from research/visual_bge/eva_clip/utils.py rename to research/visual_bge/visual_bge/eva_clip/utils.py diff --git a/research/visual_bge/modeling.py b/research/visual_bge/visual_bge/modeling.py similarity index 100% rename from research/visual_bge/modeling.py rename to research/visual_bge/visual_bge/modeling.py