FlagEmbedding/examples/inference/reranker/decoder_only/lightweight_multi_devices.py
2024-10-23 23:35:26 +08:00

34 lines
1.2 KiB
Python

import os
from FlagEmbedding import LightWeightFlagLLMReranker
def test_base_multi_devices():
model = LightWeightFlagLLMReranker(
'BAAI/bge-reranker-v2.5-gemma2-lightweight',
use_fp16=True,
query_instruction_for_rerank="A: ",
passage_instruction_for_rerank="B: ",
trust_remote_code=True,
devices=["cuda:3", "cuda:4"], # if you don't have GPUs, you can use ["cpu", "cpu"]
cache_dir=os.getenv('HF_HUB_CACHE', None),
)
pairs = [
["What is the capital of France?", "Paris is the capital of France."],
["What is the capital of France?", "The population of China is over 1.4 billion people."],
["What is the population of China?", "Paris is the capital of France."],
["What is the population of China?", "The population of China is over 1.4 billion people."]
] * 100
scores = model.compute_score(pairs, cutoff_layers=[28], compress_ratio=2, compress_layers=[24, 40])
print(scores[:4])
if __name__ == '__main__':
test_base_multi_devices()
print("--------------------------------")
print("Expected Output:")
print("[25.375, 8.734375, 9.8359375, 26.15625]")