mirror of
https://github.com/FlagOpen/FlagEmbedding.git
synced 2025-07-04 07:27:35 +00:00
153 lines
3.9 KiB
Plaintext
153 lines
3.9 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Evaluation Using Sentence Transformers"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"In this tutorial, we will go through how to use the Sentence Tranformers library to do evaluation."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 0. Installation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%pip install -U sentence-transformers"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sentence_transformers import SentenceTransformer\n",
|
|
"\n",
|
|
"# Load a model\n",
|
|
"model = SentenceTransformer('all-MiniLM-L6-v2')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 1. Retrieval"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Let's choose retrieval as the first task"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import random\n",
|
|
"\n",
|
|
"from sentence_transformers.evaluation import InformationRetrievalEvaluator\n",
|
|
"\n",
|
|
"from datasets import load_dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"BeIR is a well known benchmark for retrieval. Let's use the xxx dataset for our evaluation."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load the Quora IR dataset (https://huggingface.co/datasets/BeIR/quora, https://huggingface.co/datasets/BeIR/quora-qrels)\n",
|
|
"corpus = load_dataset(\"BeIR/quora\", \"corpus\", split=\"corpus\")\n",
|
|
"queries = load_dataset(\"BeIR/quora\", \"queries\", split=\"queries\")\n",
|
|
"relevant_docs_data = load_dataset(\"BeIR/quora-qrels\", split=\"validation\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Shrink the corpus size heavily to only the relevant documents + 10,000 random documents\n",
|
|
"required_corpus_ids = list(map(str, relevant_docs_data[\"corpus-id\"]))\n",
|
|
"required_corpus_ids += random.sample(corpus[\"_id\"], k=10_000)\n",
|
|
"corpus = corpus.filter(lambda x: x[\"_id\"] in required_corpus_ids)\n",
|
|
"\n",
|
|
"# Convert the datasets to dictionaries\n",
|
|
"corpus = dict(zip(corpus[\"_id\"], corpus[\"text\"])) # Our corpus (cid => document)\n",
|
|
"queries = dict(zip(queries[\"_id\"], queries[\"text\"])) # Our queries (qid => question)\n",
|
|
"relevant_docs = {} # Query ID to relevant documents (qid => set([relevant_cids])\n",
|
|
"for qid, corpus_ids in zip(relevant_docs_data[\"query-id\"], relevant_docs_data[\"corpus-id\"]):\n",
|
|
" qid = str(qid)\n",
|
|
" corpus_ids = str(corpus_ids)\n",
|
|
" if qid not in relevant_docs:\n",
|
|
" relevant_docs[qid] = set()\n",
|
|
" relevant_docs[qid].add(corpus_ids)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Finally we are ready to do the evaluation."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Given queries, a corpus and a mapping with relevant documents, the InformationRetrievalEvaluator computes different IR metrics.\n",
|
|
"ir_evaluator = InformationRetrievalEvaluator(\n",
|
|
" queries=queries,\n",
|
|
" corpus=corpus,\n",
|
|
" relevant_docs=relevant_docs,\n",
|
|
" name=\"BeIR-quora-dev\",\n",
|
|
")\n",
|
|
"\n",
|
|
"results = ir_evaluator(model)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.12.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|