From 2c4c9629f30fa77bb63dcb7a4d005bf5a7c93db2 Mon Sep 17 00:00:00 2001
From: hanhainebula <2512674094@qq.com>
Date: Sat, 19 Oct 2024 21:33:16 +0800
Subject: [PATCH] upload m3 embedder compute_score examples

---
 .../inference/embedder/encoder_only/m3.py     |  5 +-
 .../m3_multi_devices_compute_score.py         | 59 +++++++++++++++++++
 .../embedder/encoder_only/m3_single_device.py |  4 +-
 .../m3_single_device_compute_score.py         | 59 +++++++++++++++++++
 4 files changed, 124 insertions(+), 3 deletions(-)
 create mode 100644 examples/inference/embedder/encoder_only/m3_multi_devices_compute_score.py
 create mode 100644 examples/inference/embedder/encoder_only/m3_single_device_compute_score.py

diff --git a/FlagEmbedding/inference/embedder/encoder_only/m3.py b/FlagEmbedding/inference/embedder/encoder_only/m3.py
index 83997f3..9e24fba 100644
--- a/FlagEmbedding/inference/embedder/encoder_only/m3.py
+++ b/FlagEmbedding/inference/embedder/encoder_only/m3.py
@@ -344,7 +344,10 @@ class M3Embedder(AbsEmbedder):
         max_passage_length: int = 512,
         weights_for_different_modes: List[float] = None,
         **kwargs: Any
-    ) -> Dict[str, List[float]]:
+    ) -> Dict[
+        Literal["colbert", "sparse", "dense", "sparse+dense", "colbert+sparse+dense"],
+        List[float]
+    ]:
         if len(self.target_devices) == 1:
             return self.compute_score_single_device(
                 sentence_pairs,
diff --git a/examples/inference/embedder/encoder_only/m3_multi_devices_compute_score.py b/examples/inference/embedder/encoder_only/m3_multi_devices_compute_score.py
new file mode 100644
index 0000000..7beda7b
--- /dev/null
+++ b/examples/inference/embedder/encoder_only/m3_multi_devices_compute_score.py
@@ -0,0 +1,59 @@
+import os
+from FlagEmbedding import BGEM3FlagModel
+
+
+def test_m3_multi_devices():
+    model = BGEM3FlagModel(
+        'BAAI/bge-m3',
+        normalize_embeddings=True,
+        use_fp16=True,
+        devices=["cuda:0", "cuda:1"],   # if you don't have GPUs, you can use ["cpu", "cpu"]
+        pooling_method='cls',
+        cache_dir=os.getenv('HF_HUB_CACHE', None),
+    )
+    
+    queries = [
+        "What is BGE M3?",
+        "Defination of BM25"
+    ] * 100
+    passages = [
+        "BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction.", 
+        "BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document"
+    ] * 100
+    
+    sentence_pairs = list(zip(queries, passages))
+    scores_dict = model.compute_score(
+        sentence_pairs,
+        weights_for_different_modes=[1., 0.3, 1.]
+    )
+    
+    queries.reverse()
+    sentence_pairs = list(zip(queries, passages))
+    
+    scores_dict_reverse = model.compute_score(
+        sentence_pairs,
+        weights_for_different_modes=[1., 0.3, 1.]
+    )
+    
+    scores_dict = {
+        key: value[:2]
+        for key, value in scores_dict.items()
+    }
+    scores_dict_reverse = {
+        key: value[:2]
+        for key, value in scores_dict_reverse.items()
+    }
+    
+    print(scores_dict)
+    print(scores_dict_reverse)
+
+
+if __name__ == '__main__':
+    test_m3_multi_devices()
+
+    # print("--------------------------------")
+    # print("Expected Output:")
+    # print("Dense score:")
+    # print(" [[0.626  0.3477]\n [0.3496 0.678 ]]")
+    # print("Sparse score:")
+    # print(" [[0.19554901 0.00880432]\n [0.         0.18036556]]")
diff --git a/examples/inference/embedder/encoder_only/m3_single_device.py b/examples/inference/embedder/encoder_only/m3_single_device.py
index 2bb685d..12c7c84 100644
--- a/examples/inference/embedder/encoder_only/m3_single_device.py
+++ b/examples/inference/embedder/encoder_only/m3_single_device.py
@@ -2,7 +2,7 @@ import os
 from FlagEmbedding import BGEM3FlagModel
 
 
-def test_m3_single_devices():
+def test_m3_single_device():
     model = BGEM3FlagModel(
         'BAAI/bge-m3',
         normalize_embeddings=True,
@@ -45,7 +45,7 @@ def test_m3_single_devices():
 
 
 if __name__ == '__main__':
-    test_m3_single_devices()
+    test_m3_single_device()
 
     print("--------------------------------")
     print("Expected Output:")
diff --git a/examples/inference/embedder/encoder_only/m3_single_device_compute_score.py b/examples/inference/embedder/encoder_only/m3_single_device_compute_score.py
new file mode 100644
index 0000000..ed07cde
--- /dev/null
+++ b/examples/inference/embedder/encoder_only/m3_single_device_compute_score.py
@@ -0,0 +1,59 @@
+import os
+from FlagEmbedding import BGEM3FlagModel
+
+
+def test_m3_single_device():
+    model = BGEM3FlagModel(
+        'BAAI/bge-m3',
+        normalize_embeddings=True,
+        use_fp16=True,
+        devices="cuda:0",   # if you don't have a GPU, you can use "cpu"
+        pooling_method='cls',
+        cache_dir=os.getenv('HF_HUB_CACHE', None),
+    )
+    
+    queries = [
+        "What is BGE M3?",
+        "Defination of BM25"
+    ] * 100
+    passages = [
+        "BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction.", 
+        "BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document"
+    ] * 100
+    
+    sentence_pairs = list(zip(queries, passages))
+    scores_dict = model.compute_score(
+        sentence_pairs,
+        weights_for_different_modes=[1., 0.3, 1.]
+    )
+    
+    queries.reverse()
+    sentence_pairs = list(zip(queries, passages))
+    
+    scores_dict_reverse = model.compute_score(
+        sentence_pairs,
+        weights_for_different_modes=[1., 0.3, 1.]
+    )
+    
+    scores_dict = {
+        key: value[:2]
+        for key, value in scores_dict.items()
+    }
+    scores_dict_reverse = {
+        key: value[:2]
+        for key, value in scores_dict_reverse.items()
+    }
+    
+    print(scores_dict)
+    print(scores_dict_reverse)
+
+
+if __name__ == '__main__':
+    test_m3_single_device()
+
+    # print("--------------------------------")
+    # print("Expected Output:")
+    # print("Dense score:")
+    # print(" [[0.626  0.3477]\n [0.3496 0.678 ]]")
+    # print("Sparse score:")
+    # print(" [[0.19554901 0.00880432]\n [0.         0.18036556]]")