Merge pull request #1189 from hanhainebula/master

delete useless parameters for embedder classes
This commit is contained in:
chaofan 2024-11-04 16:08:06 +08:00 committed by GitHub
commit 17729a65c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 1 additions and 28 deletions

View File

@ -47,8 +47,6 @@ class BaseLLMEmbedder(AbsEmbedder):
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`.
instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`.
convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor.
Defaults to :data:`True`.
@ -72,8 +70,6 @@ class BaseLLMEmbedder(AbsEmbedder):
batch_size: int = 256,
query_max_length: int = 512,
passage_max_length: int = 512,
instruction: Optional[str] = None,
instruction_format: str = "{}{}",
convert_to_numpy: bool = True,
**kwargs: Any,
):
@ -87,8 +83,6 @@ class BaseLLMEmbedder(AbsEmbedder):
batch_size=batch_size,
query_max_length=query_max_length,
passage_max_length=passage_max_length,
instruction=instruction,
instruction_format=instruction_format,
convert_to_numpy=convert_to_numpy,
**kwargs
)

View File

@ -54,8 +54,6 @@ class ICLLLMEmbedder(AbsEmbedder):
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`.
instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`.
convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor.
Defaults to :data:`True`.
@ -81,8 +79,6 @@ class ICLLLMEmbedder(AbsEmbedder):
batch_size: int = 256,
query_max_length: int = 512,
passage_max_length: int = 512,
instruction: Optional[str] = None,
instruction_format: str = "{}{}",
convert_to_numpy: bool = True,
**kwargs: Any,
):
@ -96,10 +92,8 @@ class ICLLLMEmbedder(AbsEmbedder):
batch_size=batch_size,
query_max_length=query_max_length,
passage_max_length=passage_max_length,
instruction=instruction,
instruction_format=instruction_format,
convert_to_numpy=convert_to_numpy,
kwargs=kwargs
**kwargs
)
self.tokenizer = AutoTokenizer.from_pretrained(

View File

@ -28,8 +28,6 @@ class BaseEmbedder(AbsEmbedder):
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`.
instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`.
convert_to_numpy (bool, optional): If True, the output embedding will be a Numpy array. Otherwise, it will be a Torch Tensor.
Defaults to :data:`True`.
@ -55,8 +53,6 @@ class BaseEmbedder(AbsEmbedder):
batch_size: int = 256,
query_max_length: int = 512,
passage_max_length: int = 512,
instruction: Optional[str] = None,
instruction_format: str = "{}{}",
convert_to_numpy: bool = True,
**kwargs: Any,
):
@ -70,8 +66,6 @@ class BaseEmbedder(AbsEmbedder):
batch_size=batch_size,
query_max_length=query_max_length,
passage_max_length=passage_max_length,
instruction=instruction,
instruction_format=instruction_format,
convert_to_numpy=convert_to_numpy,
**kwargs
)
@ -201,9 +195,6 @@ class BaseEmbedder(AbsEmbedder):
if device == "cpu": self.use_fp16 = False
if self.use_fp16: self.model.half()
if device == "cpu": self.use_fp16 = False
if self.use_fp16: self.model.half()
self.model.to(device)
self.model.eval()

View File

@ -38,8 +38,6 @@ class M3Embedder(AbsEmbedder):
batch_size (int, optional): Batch size for inference. Defaults to :data:`256`.
query_max_length (int, optional): Maximum length for query. Defaults to :data:`512`.
passage_max_length (int, optional): Maximum length for passage. Defaults to :data:`512`.
instruction (Optional[str], optional): Instruction for embedding with :attr:`instruction_format`. Defaults to :data:`None`.
instruction_format (str, optional): Instruction format when using :attr:`instruction`. Defaults to :data:`"{}{}"`.
return_dense (bool, optional): If true, will return the dense embedding. Defaults to :data:`True`.
return_sparse (bool, optional): If true, will return the sparce embedding. Defaults to :data:`False`.
return_colbert_vecs (bool, optional): If true, will return the colbert vectors. Defaults to :data:`False`.
@ -66,8 +64,6 @@ class M3Embedder(AbsEmbedder):
batch_size: int = 256,
query_max_length: int = 512,
passage_max_length: int = 512,
instruction: Optional[str] = None,
instruction_format: str = "{}{}",
return_dense: bool = True,
return_sparse: bool = False,
return_colbert_vecs: bool = False,
@ -83,8 +79,6 @@ class M3Embedder(AbsEmbedder):
batch_size=batch_size,
query_max_length=query_max_length,
passage_max_length=passage_max_length,
instruction=instruction,
instruction_format=instruction_format,
return_dense=return_dense,
return_sparse=return_sparse,
return_colbert_vecs=return_colbert_vecs,