From d412884a4e96c45e4a9e83a43e507e9beaa04713 Mon Sep 17 00:00:00 2001 From: cfli <545999961@qq.com> Date: Thu, 22 May 2025 18:06:43 +0800 Subject: [PATCH] update reinforced_ir --- research/Reinforced_IR/README.md | 2 +- .../finetune/retriever/arguments.py | 2 -- .../finetune/retriever/modeling.py | 2 -- .../finetune/retriever/runner.py | 20 ------------------- 4 files changed, 1 insertion(+), 25 deletions(-) diff --git a/research/Reinforced_IR/README.md b/research/Reinforced_IR/README.md index d070985..ac7e65d 100644 --- a/research/Reinforced_IR/README.md +++ b/research/Reinforced_IR/README.md @@ -114,7 +114,7 @@ For all data, you can save with the following format: ├─data | ├─msmarco | ├─corpus.json -| ├─msmarco +| ├─trec-covid | ├─corpus.json | ├─nq | ├─corpus.json diff --git a/research/Reinforced_IR/finetune/retriever/arguments.py b/research/Reinforced_IR/finetune/retriever/arguments.py index 20c610c..3719aa3 100644 --- a/research/Reinforced_IR/finetune/retriever/arguments.py +++ b/research/Reinforced_IR/finetune/retriever/arguments.py @@ -12,8 +12,6 @@ class IREmbedderTrainingArguments(AbsEmbedderTrainingArguments): """ Training argument class for M3. """ - use_linear_for_answer: bool = field(default=False, metadata={"help": "use linear fuse for answer"}) - linear_path: str = field(default=None, metadata={"help": "The linear weight path"}) training_type: str = field(default='retrieval_answer', metadata={"help": "whether to use answer"}) answer_temperature: float = field(default=None, metadata={"help": "temperature for answer"}) normalize_answer: bool = field(default=True, metadata={"help": "normalize answer"}) diff --git a/research/Reinforced_IR/finetune/retriever/modeling.py b/research/Reinforced_IR/finetune/retriever/modeling.py index c57c40d..5878127 100644 --- a/research/Reinforced_IR/finetune/retriever/modeling.py +++ b/research/Reinforced_IR/finetune/retriever/modeling.py @@ -49,8 +49,6 @@ class BiIREmbedderModel(BiEncoderOnlyEmbedderModel): sentence_pooling_method: str = 'cls', normalize_embeddings: bool = False, normalize_answer: bool = True, - use_linear_for_answer: bool = False, - answer_model: AutoModel = None, training_type: str = 'retrieval_answer' ): super().__init__( diff --git a/research/Reinforced_IR/finetune/retriever/runner.py b/research/Reinforced_IR/finetune/retriever/runner.py index fce7b2b..e0f3428 100644 --- a/research/Reinforced_IR/finetune/retriever/runner.py +++ b/research/Reinforced_IR/finetune/retriever/runner.py @@ -77,24 +77,6 @@ class IREmbedderRunner(AbsEmbedderRunner): trust_remote_code=self.model_args.trust_remote_code ) - if self.training_args.use_linear_for_answer: - if self.training_args.linear_path is not None: - answer_model = AutoModel.from_pretrained( - self.training_args.linear_path, - cache_dir=self.model_args.cache_dir, - token=self.model_args.token, - trust_remote_code=self.model_args.trust_remote_code - ) - else: - answer_model = AutoModel.from_pretrained( - self.model_args.model_name_or_path, - cache_dir=self.model_args.cache_dir, - token=self.model_args.token, - trust_remote_code=self.model_args.trust_remote_code - ) - else: - answer_model = None - num_labels = 1 config = AutoConfig.from_pretrained( self.model_args.config_name if self.model_args.config_name else self.model_args.model_name_or_path, @@ -115,8 +97,6 @@ class IREmbedderRunner(AbsEmbedderRunner): kd_loss_type=self.training_args.kd_loss_type, sentence_pooling_method=self.training_args.sentence_pooling_method, normalize_embeddings=self.training_args.normalize_embeddings, - use_linear_for_answer=self.training_args.use_linear_for_answer, - answer_model=answer_model, normalize_answer=self.training_args.normalize_answer, training_type=self.training_args.training_type )