diff --git a/FlagEmbedding/abc/inference/AbsEmbedder.py b/FlagEmbedding/abc/inference/AbsEmbedder.py index 13e74f5..2da111f 100644 --- a/FlagEmbedding/abc/inference/AbsEmbedder.py +++ b/FlagEmbedding/abc/inference/AbsEmbedder.py @@ -167,6 +167,9 @@ class AbsEmbedder(ABC): ): if instruction is None: instruction = self.instruction if instruction_format is None: instruction_format = self.instruction_format + if batch_size is None: batch_size = self.batch_size + if max_length is None: max_length = self.passage_max_length + if convert_to_numpy is None: convert_to_numpy = self.convert_to_numpy if instruction is not None: if isinstance(sentences, str): diff --git a/examples/evaluation/air_bench/eval_air_bench.sh b/examples/evaluation/air_bench/eval_air_bench.sh index 8185c21..bba6bc1 100644 --- a/examples/evaluation/air_bench/eval_air_bench.sh +++ b/examples/evaluation/air_bench/eval_air_bench.sh @@ -8,7 +8,7 @@ eval_args="\ --domains arxiv \ --languages en \ --splits dev test \ - --output_dir /share/jianlv/evaluation/air_bench/search_results \ + --output_dir ./air_bench/search_results \ --search_top_k 1000 --rerank_top_k 100 \ --cache_dir $HF_HUB_CACHE \ --overwrite False \ diff --git a/examples/evaluation/beir/eval_beir.sh b/examples/evaluation/beir/eval_beir.sh index e9415ee..8450dde 100644 --- a/examples/evaluation/beir/eval_beir.sh +++ b/examples/evaluation/beir/eval_beir.sh @@ -2,24 +2,21 @@ if [ -z "$HF_HUB_CACHE" ]; then export HF_HUB_CACHE="$HOME/.cache/huggingface/hub" fi -HF_HUB_CACHE="/share/chaofan/code/FlagEmbedding_update/data/BEIR" -HF_MODEL_CACHE="/share/shared_models" - dataset_names="fiqa arguana cqadupstack" eval_args="\ --eval_name beir \ - --dataset_dir /share/chaofan/code/FlagEmbedding_update/data/beir \ + --dataset_dir ./beir/data \ --dataset_names $dataset_names \ --splits test dev \ - --corpus_embd_save_dir /share/chaofan/code/FlagEmbedding_update/data/beir/corpus_embd \ - --output_dir /share/chaofan/code/FlagEmbedding_update/data/beir/search_results \ + --corpus_embd_save_dir ./beir/corpus_embd \ + --output_dir ./beir/search_results \ --search_top_k 1000 --rerank_top_k 100 \ --cache_path $HF_HUB_CACHE \ --overwrite False \ --k_values 10 100 \ --eval_output_method markdown \ - --eval_output_path /share/chaofan/code/FlagEmbedding_update/data/beir/beir_eval_results.md \ + --eval_output_path ./beir/beir_eval_results.md \ --eval_metrics ndcg_at_10 recall_at_100 \ " diff --git a/examples/evaluation/miracl/eval_miracl.sh b/examples/evaluation/miracl/eval_miracl.sh index dfc39bc..573533b 100644 --- a/examples/evaluation/miracl/eval_miracl.sh +++ b/examples/evaluation/miracl/eval_miracl.sh @@ -6,17 +6,17 @@ dataset_names="bn hi sw te th yo" eval_args="\ --eval_name miracl \ - --dataset_dir /share/jianlv/evaluation/miracl/data \ + --dataset_dir ./miracl/data \ --dataset_names $dataset_names \ --splits dev \ - --corpus_embd_save_dir /share/jianlv/evaluation/miracl/corpus_embd \ - --output_dir /share/jianlv/evaluation/miracl/search_results \ + --corpus_embd_save_dir ./miracl/corpus_embd \ + --output_dir ./miracl/search_results \ --search_top_k 1000 --rerank_top_k 100 \ --cache_path $HF_HUB_CACHE \ --overwrite False \ --k_values 10 100 \ --eval_output_method markdown \ - --eval_output_path /share/jianlv/evaluation/miracl/miracl_eval_results.md \ + --eval_output_path ./miracl/miracl_eval_results.md \ --eval_metrics ndcg_at_10 recall_at_100 \ " diff --git a/examples/evaluation/mkqa/eval_mkqa.sh b/examples/evaluation/mkqa/eval_mkqa.sh index 81c5967..bee8f16 100644 --- a/examples/evaluation/mkqa/eval_mkqa.sh +++ b/examples/evaluation/mkqa/eval_mkqa.sh @@ -6,17 +6,17 @@ dataset_names="en zh_cn" eval_args="\ --eval_name mkqa \ - --dataset_dir /share/jianlv/evaluation/mkqa/data \ + --dataset_dir ./mkqa/data \ --dataset_names $dataset_names \ --splits test \ - --corpus_embd_save_dir /share/jianlv/evaluation/mkqa/corpus_embd \ - --output_dir /share/jianlv/evaluation/mkqa/search_results \ + --corpus_embd_save_dir ./mkqa/corpus_embd \ + --output_dir ./mkqa/search_results \ --search_top_k 1000 --rerank_top_k 100 \ --cache_path $HF_HUB_CACHE \ --overwrite False \ --k_values 20 \ --eval_output_method markdown \ - --eval_output_path /share/jianlv/evaluation/mkqa/mkqa_eval_results.md \ + --eval_output_path ./mkqa/mkqa_eval_results.md \ --eval_metrics qa_recall_at_20 \ " diff --git a/examples/evaluation/mldr/eval_mldr.sh b/examples/evaluation/mldr/eval_mldr.sh index cbf1ebc..0ca4b27 100644 --- a/examples/evaluation/mldr/eval_mldr.sh +++ b/examples/evaluation/mldr/eval_mldr.sh @@ -6,17 +6,17 @@ dataset_names="hi" eval_args="\ --eval_name mldr \ - --dataset_dir /share/jianlv/evaluation/mldr/data \ + --dataset_dir ./mldr/data \ --dataset_names $dataset_names \ --splits test \ - --corpus_embd_save_dir /share/jianlv/evaluation/mldr/corpus_embd \ - --output_dir /share/jianlv/evaluation/mldr/search_results \ + --corpus_embd_save_dir ./mldr/corpus_embd \ + --output_dir ./mldr/search_results \ --search_top_k 1000 --rerank_top_k 100 \ --cache_path $HF_HUB_CACHE \ --overwrite False \ --k_values 10 100 \ --eval_output_method markdown \ - --eval_output_path /share/jianlv/evaluation/mldr/mldr_eval_results.md \ + --eval_output_path ./mldr/mldr_eval_results.md \ --eval_metrics ndcg_at_10 \ " diff --git a/examples/evaluation/msmarco/eval_msmarco.sh b/examples/evaluation/msmarco/eval_msmarco.sh index b7dd1ab..ba87c90 100644 --- a/examples/evaluation/msmarco/eval_msmarco.sh +++ b/examples/evaluation/msmarco/eval_msmarco.sh @@ -2,23 +2,21 @@ if [ -z "$HF_HUB_CACHE" ]; then export HF_HUB_CACHE="$HOME/.cache/huggingface/hub" fi -HF_HUB_CACHE="/share/shared_models" - dataset_names="passage" eval_args="\ --eval_name msmarco \ - --dataset_dir /share/chaofan/code/FlagEmbedding_update/data/msmarco \ + --dataset_dir ./msmarco/data \ --dataset_names $dataset_names \ --splits dev \ - --corpus_embd_save_dir /share/chaofan/code/FlagEmbedding_update/data/msmarco/corpus_embd \ - --output_dir /share/chaofan/code/FlagEmbedding_update/data/msmarco/search_results \ + --corpus_embd_save_dir ./msmarco/corpus_embd \ + --output_dir ./msmarco/search_results \ --search_top_k 1000 --rerank_top_k 100 \ --cache_path $HF_HUB_CACHE \ --overwrite True \ --k_values 10 100 \ --eval_output_method markdown \ - --eval_output_path /share/chaofan/code/FlagEmbedding_update/data/msmarco/msmarco_eval_results.md \ + --eval_output_path ./msmarco/msmarco_eval_results.md \ --eval_metrics ndcg_at_10 recall_at_100 \ " diff --git a/examples/evaluation/mteb/eval_mteb.sh b/examples/evaluation/mteb/eval_mteb.sh index 5268148..fe6c3e9 100644 --- a/examples/evaluation/mteb/eval_mteb.sh +++ b/examples/evaluation/mteb/eval_mteb.sh @@ -2,17 +2,15 @@ if [ -z "$HF_HUB_CACHE" ]; then export HF_HUB_CACHE="$HOME/.cache/huggingface/hub" fi -HF_HUB_CACHE="/share/shared_models" - languages="eng" tasks="NFCorpus BiorxivClusteringS2S SciDocsRR" eval_args="\ --eval_name mteb \ - --output_dir /share/chaofan/code/FlagEmbedding_update/data/mteb/search_results \ + --output_dir ./mteb/search_results \ --languages $languages \ --tasks $tasks \ - --eval_output_path /share/chaofan/code/FlagEmbedding_update/data/mteb/mteb_eval_results.json + --eval_output_path ./mteb/mteb_eval_results.json " model_args="\ diff --git a/examples/finetune/reranker/decoder_only/base.sh b/examples/finetune/reranker/decoder_only/base.sh index 7c1d72f..8aeb1ad 100644 --- a/examples/finetune/reranker/decoder_only/base.sh +++ b/examples/finetune/reranker/decoder_only/base.sh @@ -60,7 +60,6 @@ training_args="\ " cmd="torchrun --nproc_per_node $num_gpus \ - --master_port=4567 \ -m FlagEmbedding.finetune.reranker.decoder_only.base \ $model_args \ $data_args \ diff --git a/examples/finetune/reranker/decoder_only/layerwise.sh b/examples/finetune/reranker/decoder_only/layerwise.sh index 94ebc7f..635bc50 100644 --- a/examples/finetune/reranker/decoder_only/layerwise.sh +++ b/examples/finetune/reranker/decoder_only/layerwise.sh @@ -65,7 +65,6 @@ training_args="\ " cmd="torchrun --nproc_per_node $num_gpus \ - --master_port=4567 \ -m FlagEmbedding.finetune.reranker.decoder_only.layerwise \ $model_args \ $data_args \