mirror of
https://github.com/FlagOpen/FlagEmbedding.git
synced 2025-06-27 02:39:58 +00:00
update readme
This commit is contained in:
parent
34e9c21654
commit
6c71f3fc71
@ -167,6 +167,9 @@ class AbsEmbedder(ABC):
|
||||
):
|
||||
if instruction is None: instruction = self.instruction
|
||||
if instruction_format is None: instruction_format = self.instruction_format
|
||||
if batch_size is None: batch_size = self.batch_size
|
||||
if max_length is None: max_length = self.passage_max_length
|
||||
if convert_to_numpy is None: convert_to_numpy = self.convert_to_numpy
|
||||
|
||||
if instruction is not None:
|
||||
if isinstance(sentences, str):
|
||||
|
@ -8,7 +8,7 @@ eval_args="\
|
||||
--domains arxiv \
|
||||
--languages en \
|
||||
--splits dev test \
|
||||
--output_dir /share/jianlv/evaluation/air_bench/search_results \
|
||||
--output_dir ./air_bench/search_results \
|
||||
--search_top_k 1000 --rerank_top_k 100 \
|
||||
--cache_dir $HF_HUB_CACHE \
|
||||
--overwrite False \
|
||||
|
@ -2,24 +2,21 @@ if [ -z "$HF_HUB_CACHE" ]; then
|
||||
export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
|
||||
fi
|
||||
|
||||
HF_HUB_CACHE="/share/chaofan/code/FlagEmbedding_update/data/BEIR"
|
||||
HF_MODEL_CACHE="/share/shared_models"
|
||||
|
||||
dataset_names="fiqa arguana cqadupstack"
|
||||
|
||||
eval_args="\
|
||||
--eval_name beir \
|
||||
--dataset_dir /share/chaofan/code/FlagEmbedding_update/data/beir \
|
||||
--dataset_dir ./beir/data \
|
||||
--dataset_names $dataset_names \
|
||||
--splits test dev \
|
||||
--corpus_embd_save_dir /share/chaofan/code/FlagEmbedding_update/data/beir/corpus_embd \
|
||||
--output_dir /share/chaofan/code/FlagEmbedding_update/data/beir/search_results \
|
||||
--corpus_embd_save_dir ./beir/corpus_embd \
|
||||
--output_dir ./beir/search_results \
|
||||
--search_top_k 1000 --rerank_top_k 100 \
|
||||
--cache_path $HF_HUB_CACHE \
|
||||
--overwrite False \
|
||||
--k_values 10 100 \
|
||||
--eval_output_method markdown \
|
||||
--eval_output_path /share/chaofan/code/FlagEmbedding_update/data/beir/beir_eval_results.md \
|
||||
--eval_output_path ./beir/beir_eval_results.md \
|
||||
--eval_metrics ndcg_at_10 recall_at_100 \
|
||||
"
|
||||
|
||||
|
@ -6,17 +6,17 @@ dataset_names="bn hi sw te th yo"
|
||||
|
||||
eval_args="\
|
||||
--eval_name miracl \
|
||||
--dataset_dir /share/jianlv/evaluation/miracl/data \
|
||||
--dataset_dir ./miracl/data \
|
||||
--dataset_names $dataset_names \
|
||||
--splits dev \
|
||||
--corpus_embd_save_dir /share/jianlv/evaluation/miracl/corpus_embd \
|
||||
--output_dir /share/jianlv/evaluation/miracl/search_results \
|
||||
--corpus_embd_save_dir ./miracl/corpus_embd \
|
||||
--output_dir ./miracl/search_results \
|
||||
--search_top_k 1000 --rerank_top_k 100 \
|
||||
--cache_path $HF_HUB_CACHE \
|
||||
--overwrite False \
|
||||
--k_values 10 100 \
|
||||
--eval_output_method markdown \
|
||||
--eval_output_path /share/jianlv/evaluation/miracl/miracl_eval_results.md \
|
||||
--eval_output_path ./miracl/miracl_eval_results.md \
|
||||
--eval_metrics ndcg_at_10 recall_at_100 \
|
||||
"
|
||||
|
||||
|
@ -6,17 +6,17 @@ dataset_names="en zh_cn"
|
||||
|
||||
eval_args="\
|
||||
--eval_name mkqa \
|
||||
--dataset_dir /share/jianlv/evaluation/mkqa/data \
|
||||
--dataset_dir ./mkqa/data \
|
||||
--dataset_names $dataset_names \
|
||||
--splits test \
|
||||
--corpus_embd_save_dir /share/jianlv/evaluation/mkqa/corpus_embd \
|
||||
--output_dir /share/jianlv/evaluation/mkqa/search_results \
|
||||
--corpus_embd_save_dir ./mkqa/corpus_embd \
|
||||
--output_dir ./mkqa/search_results \
|
||||
--search_top_k 1000 --rerank_top_k 100 \
|
||||
--cache_path $HF_HUB_CACHE \
|
||||
--overwrite False \
|
||||
--k_values 20 \
|
||||
--eval_output_method markdown \
|
||||
--eval_output_path /share/jianlv/evaluation/mkqa/mkqa_eval_results.md \
|
||||
--eval_output_path ./mkqa/mkqa_eval_results.md \
|
||||
--eval_metrics qa_recall_at_20 \
|
||||
"
|
||||
|
||||
|
@ -6,17 +6,17 @@ dataset_names="hi"
|
||||
|
||||
eval_args="\
|
||||
--eval_name mldr \
|
||||
--dataset_dir /share/jianlv/evaluation/mldr/data \
|
||||
--dataset_dir ./mldr/data \
|
||||
--dataset_names $dataset_names \
|
||||
--splits test \
|
||||
--corpus_embd_save_dir /share/jianlv/evaluation/mldr/corpus_embd \
|
||||
--output_dir /share/jianlv/evaluation/mldr/search_results \
|
||||
--corpus_embd_save_dir ./mldr/corpus_embd \
|
||||
--output_dir ./mldr/search_results \
|
||||
--search_top_k 1000 --rerank_top_k 100 \
|
||||
--cache_path $HF_HUB_CACHE \
|
||||
--overwrite False \
|
||||
--k_values 10 100 \
|
||||
--eval_output_method markdown \
|
||||
--eval_output_path /share/jianlv/evaluation/mldr/mldr_eval_results.md \
|
||||
--eval_output_path ./mldr/mldr_eval_results.md \
|
||||
--eval_metrics ndcg_at_10 \
|
||||
"
|
||||
|
||||
|
@ -2,23 +2,21 @@ if [ -z "$HF_HUB_CACHE" ]; then
|
||||
export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
|
||||
fi
|
||||
|
||||
HF_HUB_CACHE="/share/shared_models"
|
||||
|
||||
dataset_names="passage"
|
||||
|
||||
eval_args="\
|
||||
--eval_name msmarco \
|
||||
--dataset_dir /share/chaofan/code/FlagEmbedding_update/data/msmarco \
|
||||
--dataset_dir ./msmarco/data \
|
||||
--dataset_names $dataset_names \
|
||||
--splits dev \
|
||||
--corpus_embd_save_dir /share/chaofan/code/FlagEmbedding_update/data/msmarco/corpus_embd \
|
||||
--output_dir /share/chaofan/code/FlagEmbedding_update/data/msmarco/search_results \
|
||||
--corpus_embd_save_dir ./msmarco/corpus_embd \
|
||||
--output_dir ./msmarco/search_results \
|
||||
--search_top_k 1000 --rerank_top_k 100 \
|
||||
--cache_path $HF_HUB_CACHE \
|
||||
--overwrite True \
|
||||
--k_values 10 100 \
|
||||
--eval_output_method markdown \
|
||||
--eval_output_path /share/chaofan/code/FlagEmbedding_update/data/msmarco/msmarco_eval_results.md \
|
||||
--eval_output_path ./msmarco/msmarco_eval_results.md \
|
||||
--eval_metrics ndcg_at_10 recall_at_100 \
|
||||
"
|
||||
|
||||
|
@ -2,17 +2,15 @@ if [ -z "$HF_HUB_CACHE" ]; then
|
||||
export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
|
||||
fi
|
||||
|
||||
HF_HUB_CACHE="/share/shared_models"
|
||||
|
||||
languages="eng"
|
||||
tasks="NFCorpus BiorxivClusteringS2S SciDocsRR"
|
||||
|
||||
eval_args="\
|
||||
--eval_name mteb \
|
||||
--output_dir /share/chaofan/code/FlagEmbedding_update/data/mteb/search_results \
|
||||
--output_dir ./mteb/search_results \
|
||||
--languages $languages \
|
||||
--tasks $tasks \
|
||||
--eval_output_path /share/chaofan/code/FlagEmbedding_update/data/mteb/mteb_eval_results.json
|
||||
--eval_output_path ./mteb/mteb_eval_results.json
|
||||
"
|
||||
|
||||
model_args="\
|
||||
|
@ -60,7 +60,6 @@ training_args="\
|
||||
"
|
||||
|
||||
cmd="torchrun --nproc_per_node $num_gpus \
|
||||
--master_port=4567 \
|
||||
-m FlagEmbedding.finetune.reranker.decoder_only.base \
|
||||
$model_args \
|
||||
$data_args \
|
||||
|
@ -65,7 +65,6 @@ training_args="\
|
||||
"
|
||||
|
||||
cmd="torchrun --nproc_per_node $num_gpus \
|
||||
--master_port=4567 \
|
||||
-m FlagEmbedding.finetune.reranker.decoder_only.layerwise \
|
||||
$model_args \
|
||||
$data_args \
|
||||
|
Loading…
x
Reference in New Issue
Block a user