mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-06 03:57:19 +00:00
Automate benchmarks via CML (#518)
* initial test cml * Update cml.yaml * WIP test workflow * switch to general ubuntu ami * switch to general ubuntu ami * disable gpu for tests * rm gpu infos * rm gpu infos * update token env * switch github token * add postgres * test db connection * fix typo * remove tty * add sleep for db * debug runner * debug removal postgres * debug: reset to working commit * debug: change github token * switch to new bot token * debug token * add back postgres * adjust network runner docker * add elastic * fix typo * adjust working dir * fix benchmark execution * enable s3 downloads * add query benchmark. fix path * add saving of markdown files * cat md files. add faiss+dpr. increase n_queries * switch to GPU instance * switch availability zone * switch to public aws DL ami * increase volume size * rm faiss. fix error logging * save markdown files * add reader benchmarks * add download of squad data * correct reader metric normalization * fix newlines between reports * fix max_docs for reader eval data. remove max_docs from ci run config * fix mypy. switch workflow trigger * try trigger for label * try trigger for label * change trigger syntax * debug machine shutdown with test workflow * add es and postgres to test workflow * Revert "add es and postgres to test workflow" This reverts commit 6f038d3d7f12eea924b54529e61b192858eaa9d5. * Revert "debug machine shutdown with test workflow" This reverts commit db70eabae8850b88e1d61fd79b04d4f49d54990a. * fix typo in action. set benchmark config back to original
This commit is contained in:
parent
3f81c93f36
commit
0acafc403a
90
.github/workflows/cml.yaml
vendored
Normal file
90
.github/workflows/cml.yaml
vendored
Normal file
@ -0,0 +1,90 @@
|
||||
name: benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
jobs:
|
||||
deploy-cloud-runner:
|
||||
if: ${{ (github.event.action == 'labeled' && github.event.label.name == 'benchmark') || github.event.action == 'workflow_dispatch' }}
|
||||
runs-on: [ubuntu-latest]
|
||||
container: docker://dvcorg/cml
|
||||
steps:
|
||||
- name: deploy
|
||||
env:
|
||||
repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CI_ACCESS_KEY }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CI_SECRET_ACCESS_KEY }}
|
||||
VPC: ${{ secrets.AWS_CI_VPC }}
|
||||
run: |
|
||||
echo "Deploying..."
|
||||
RUNNER_LABELS="cml,aws"
|
||||
RUNNER_REPO="https://github.com/${GITHUB_REPOSITORY}"
|
||||
MACHINE="cml$(date +%s)"
|
||||
docker-machine create \
|
||||
--driver amazonec2 \
|
||||
--amazonec2-instance-type p3.2xlarge \
|
||||
--amazonec2-vpc-id $VPC \
|
||||
--amazonec2-region us-east-1 \
|
||||
--amazonec2-zone c \
|
||||
--amazonec2-ssh-user ubuntu \
|
||||
--amazonec2-ami ami-06a25ee8966373068 \
|
||||
--amazonec2-root-size 150 \
|
||||
$MACHINE
|
||||
eval "$(docker-machine env --shell sh $MACHINE)"
|
||||
|
||||
(
|
||||
docker-machine ssh $MACHINE "sudo mkdir -p \
|
||||
/docker_machine && \
|
||||
sudo chmod 777 /docker_machine" && \
|
||||
docker-machine scp -r -q ~/.docker/machine/ \
|
||||
$MACHINE:/docker_machine && \
|
||||
docker run --name elasticsearch -d \
|
||||
-p 9200:9200 \
|
||||
-e "discovery.type=single-node" \
|
||||
elasticsearch:7.9.2 && \
|
||||
docker run --name postgres -d \
|
||||
-p 5432:5432 \
|
||||
--net host \
|
||||
-e POSTGRES_PASSWORD=password \
|
||||
-v /docker_machine/machine:/root/.docker/machine \
|
||||
-e DOCKER_MACHINE=$MACHINE \
|
||||
postgres && \
|
||||
sleep 4 && \
|
||||
docker exec -i postgres psql -U postgres -c "CREATE DATABASE haystack;" && \
|
||||
docker run --name runner -d \
|
||||
--gpus all \
|
||||
-v /docker_machine/machine:/root/.docker/machine \
|
||||
--net host \
|
||||
-e DOCKER_MACHINE=$MACHINE \
|
||||
-e repo_token=$repo_token \
|
||||
-e RUNNER_LABELS=$RUNNER_LABELS \
|
||||
-e RUNNER_REPO=$RUNNER_REPO \
|
||||
-e RUNNER_IDLE_TIMEOUT=120 \
|
||||
dvcorg/cml-py3:latest && \
|
||||
sleep 20 && echo "Deployed $MACHINE"
|
||||
) || (echo "Shut down machine" && docker-machine rm -y -f $MACHINE && exit 1)
|
||||
run-benchmark:
|
||||
if: ${{ (github.event.action == 'labeled' && github.event.label.name == 'benchmark') || github.event.action == 'workflow_dispatch' }}
|
||||
needs: deploy-cloud-runner
|
||||
runs-on: [self-hosted,cml]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: cml_run
|
||||
env:
|
||||
repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CI_ACCESS_KEY }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CI_SECRET_ACCESS_KEY }}
|
||||
run: |
|
||||
apt-get update -y
|
||||
apt-get install python3-dev -y
|
||||
pip install -r requirements.txt
|
||||
pip install .
|
||||
cd test/benchmarks && python run.py --retriever_index --retriever_query --reader --ci --save_markdown
|
||||
echo -en "## Benchmarks: Retriever Indexing\n" >> report.md
|
||||
cat retriever_index_results.md >> report.md
|
||||
echo -en "\n\n## Benchmarks: Retriever Querying\n" >> report.md
|
||||
cat retriever_query_results.md >> report.md
|
||||
echo -en "\n\n## Benchmarks: Reader\n" >> report.md
|
||||
cat reader_results.md >> report.md
|
||||
cml-send-comment report.md
|
||||
@ -28,17 +28,24 @@ def eval_data_from_file(filename: str, max_docs: Union[int, bool]=None) -> Tuple
|
||||
:param max_docs: This sets the number of documents that will be loaded. By default, this is set to None, thus reading in all available eval documents.
|
||||
:return: (List of Documents, List of Labels)
|
||||
"""
|
||||
docs = []
|
||||
|
||||
docs: List[Document] = []
|
||||
labels = []
|
||||
|
||||
with open(filename, "r") as file:
|
||||
data = json.load(file)
|
||||
if "title" not in data["data"][0]:
|
||||
logger.warning(f"No title information found for documents in QA file: {filename}")
|
||||
for document in data["data"][:max_docs]:
|
||||
for document in data["data"]:
|
||||
if max_docs:
|
||||
if len(docs) > max_docs:
|
||||
break
|
||||
# get all extra fields from document level (e.g. title)
|
||||
meta_doc = {k: v for k, v in document.items() if k not in ("paragraphs", "title")}
|
||||
for paragraph in document["paragraphs"]:
|
||||
if max_docs:
|
||||
if len(docs) > max_docs:
|
||||
break
|
||||
cur_meta = {"name": document.get("title", None)}
|
||||
# all other fields from paragraph level
|
||||
meta_paragraph = {k: v for k, v in paragraph.items() if k not in ("qas", "context")}
|
||||
|
||||
@ -36,7 +36,7 @@
|
||||
"n_docs_options": [
|
||||
1000
|
||||
],
|
||||
"n_queries": 10
|
||||
"n_queries": 100
|
||||
}
|
||||
},
|
||||
"filenames": {
|
||||
|
||||
@ -11,7 +11,9 @@ import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
reader_models_full = ["deepset/roberta-base-squad2"]
|
||||
reader_models_full = ["deepset/roberta-base-squad2", "deepset/minilm-uncased-squad2",
|
||||
"deepset/bert-base-cased-squad2", "deepset/bert-large-uncased-whole-word-masking-squad2",
|
||||
"deepset/xlm-roberta-large-squad2", "distilbert-base-uncased-distilled-squad"]
|
||||
reader_models_ci = ["deepset/minilm-uncased-squad2"]
|
||||
|
||||
reader_types = ["farm"]
|
||||
@ -32,18 +34,13 @@ label_index = "label"
|
||||
def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs):
|
||||
if ci:
|
||||
reader_models = reader_models_ci
|
||||
max_docs = 100
|
||||
# heuristic to estimate num of passages for the reduced num of docs
|
||||
n_passages = n_total_passages * (max_docs / n_total_docs)
|
||||
else:
|
||||
reader_models = reader_models_full
|
||||
max_docs = None
|
||||
n_passages = n_total_passages
|
||||
reader_results = []
|
||||
doc_store = get_document_store("elasticsearch")
|
||||
# download squad data
|
||||
_download_extract_downstream_data(input_file=data_dir/filename)
|
||||
docs, labels = eval_data_from_file(data_dir/filename, max_docs)
|
||||
docs, labels = eval_data_from_file(data_dir/filename, max_docs=None)
|
||||
|
||||
index_to_doc_store(doc_store, docs, None, labels)
|
||||
for reader_name in reader_models:
|
||||
@ -55,9 +52,8 @@ def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs)
|
||||
doc_index=doc_index,
|
||||
label_index=label_index,
|
||||
device="cuda")
|
||||
# results = reader.eval_on_file(data_dir, filename, device="cuda")
|
||||
print(results)
|
||||
results["passages_per_second"] = n_passages / results["reader_time"]
|
||||
# print(results)
|
||||
results["passages_per_second"] = n_total_passages / results["reader_time"]
|
||||
results["reader"] = reader_name
|
||||
results["error"] = ""
|
||||
reader_results.append(results)
|
||||
@ -78,11 +74,12 @@ def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs)
|
||||
md_file = results_file.replace(".csv", ".md")
|
||||
with open(md_file, "w") as f:
|
||||
f.write(str(reader_df.to_markdown()))
|
||||
doc_store.delete_all_documents(label_index)
|
||||
doc_store.delete_all_documents(doc_index)
|
||||
if update_json:
|
||||
populate_reader_json()
|
||||
|
||||
|
||||
|
||||
def populate_reader_json():
|
||||
reader_results = reader_json()
|
||||
template = READER_TEMPLATE
|
||||
@ -91,4 +88,4 @@ def populate_reader_json():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
benchmark_reader(ci=False, update_json=False, save_markdown=False)
|
||||
benchmark_reader(ci=True, update_json=True, save_markdown=True)
|
||||
@ -36,7 +36,7 @@ speed_json = "../../docs/_src/benchmarks/retriever_speed.json"
|
||||
seed = 42
|
||||
random.seed(42)
|
||||
|
||||
def benchmark_indexing(n_docs_options, retriever_doc_stores, data_dir, filename_gold, filename_negative, data_s3_url, embeddings_filenames, embeddings_dir, update_json, **kwargs):
|
||||
def benchmark_indexing(n_docs_options, retriever_doc_stores, data_dir, filename_gold, filename_negative, data_s3_url, embeddings_filenames, embeddings_dir, update_json, save_markdown, **kwargs):
|
||||
|
||||
retriever_results = []
|
||||
for n_docs in n_docs_options:
|
||||
@ -73,14 +73,18 @@ def benchmark_indexing(n_docs_options, retriever_doc_stores, data_dir, filename_
|
||||
retriever_df.to_csv(index_results_file)
|
||||
doc_store.delete_all_documents(index=doc_index)
|
||||
doc_store.delete_all_documents(index=label_index)
|
||||
if save_markdown:
|
||||
md_file = index_results_file.replace(".csv", ".md")
|
||||
with open(md_file, "w") as f:
|
||||
f.write(str(retriever_df.to_markdown()))
|
||||
time.sleep(10)
|
||||
del doc_store
|
||||
del retriever
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
logging.ERROR(f"##### The following Error was raised while running indexing run: {retriever_name}, {doc_store_name}, {n_docs} docs #####")
|
||||
logging.Error(tb)
|
||||
logging.error(f"##### The following Error was raised while running indexing run: {retriever_name}, {doc_store_name}, {n_docs} docs #####")
|
||||
logging.error(tb)
|
||||
retriever_results.append({
|
||||
"retriever": retriever_name,
|
||||
"doc_store": doc_store_name,
|
||||
@ -109,6 +113,7 @@ def benchmark_querying(n_docs_options,
|
||||
embeddings_filenames,
|
||||
embeddings_dir,
|
||||
update_json,
|
||||
save_markdown,
|
||||
**kwargs):
|
||||
""" Benchmark the time it takes to perform querying. Doc embeddings are loaded from file."""
|
||||
retriever_results = []
|
||||
@ -155,10 +160,10 @@ def benchmark_querying(n_docs_options,
|
||||
time.sleep(5)
|
||||
del doc_store
|
||||
del retriever
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
logging.ERROR(f"##### The following Error was raised while running querying run: {retriever_name}, {doc_store_name}, {n_docs} docs #####")
|
||||
logging.Error(tb)
|
||||
logging.error(f"##### The following Error was raised while running querying run: {retriever_name}, {doc_store_name}, {n_docs} docs #####")
|
||||
logging.error(tb)
|
||||
results = {
|
||||
"retriever": retriever_name,
|
||||
"doc_store": doc_store_name,
|
||||
@ -184,6 +189,10 @@ def benchmark_querying(n_docs_options,
|
||||
retriever_df = pd.DataFrame.from_records(retriever_results)
|
||||
retriever_df = retriever_df.sort_values(by="retriever").sort_values(by="doc_store")
|
||||
retriever_df.to_csv(query_results_file)
|
||||
if save_markdown:
|
||||
md_file = query_results_file.replace(".csv", ".md")
|
||||
with open(md_file, "w") as f:
|
||||
f.write(str(retriever_df.to_markdown()))
|
||||
if update_json:
|
||||
populate_retriever_json()
|
||||
|
||||
@ -281,6 +290,6 @@ def prepare_negative_passages(data_dir, filename_negative, n_docs):
|
||||
|
||||
if __name__ == "__main__":
|
||||
params, filenames = load_config(config_filename="config.json", ci=True)
|
||||
benchmark_indexing(**params, **filenames)
|
||||
benchmark_querying(**params, **filenames)
|
||||
benchmark_indexing(**params, **filenames, update_json=True, save_markdown=False)
|
||||
benchmark_querying(**params, **filenames, update_json=True, save_markdown=False)
|
||||
|
||||
|
||||
@ -17,13 +17,14 @@ parser.add_argument('--ci', default=False, action="store_true",
|
||||
help='Perform a smaller subset of benchmarks that are quicker to run')
|
||||
parser.add_argument('--update_json', default=False, action="store_true",
|
||||
help='Update the json file with the results of this run so that the website can be updated')
|
||||
|
||||
parser.add_argument('--save_markdown', default=False, action="store_true",
|
||||
help='Update the json file with the results of this run so that the website can be updated')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.retriever_index:
|
||||
benchmark_indexing(**params, **filenames, ci=args.ci, update_json=args.update_json)
|
||||
benchmark_indexing(**params, **filenames, ci=args.ci, update_json=args.update_json, save_markdown=args.save_markdown)
|
||||
if args.retriever_query:
|
||||
benchmark_querying(**params, **filenames, ci=args.ci, update_json=args.update_json)
|
||||
benchmark_querying(**params, **filenames, ci=args.ci, update_json=args.update_json, save_markdown=args.save_markdown)
|
||||
if args.reader:
|
||||
benchmark_reader(**params, **filenames, ci=args.ci, update_json=args.update_json)
|
||||
benchmark_reader(**params, **filenames, ci=args.ci, update_json=args.update_json, save_markdown=args.save_markdown)
|
||||
|
||||
|
||||
@ -18,8 +18,6 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
reader_models = ["deepset/roberta-base-squad2", "deepset/minilm-uncased-squad2", "deepset/bert-base-cased-squad2", "deepset/bert-large-uncased-whole-word-masking-squad2", "deepset/xlm-roberta-large-squad2"]
|
||||
reader_types = ["farm"]
|
||||
data_dir_reader = Path("../../data/squad20")
|
||||
filename_reader = "dev-v2.0.json"
|
||||
|
||||
doc_index = "eval_document"
|
||||
label_index = "label"
|
||||
@ -43,18 +41,6 @@ def get_document_store(document_store_type, es_similarity='cosine'):
|
||||
index_type = "Flat"
|
||||
elif document_store_type == "faiss_hnsw":
|
||||
index_type = "HNSW"
|
||||
|
||||
#TEMP FIX for issue with deleting docs
|
||||
# status = subprocess.run(
|
||||
# ['docker rm -f haystack-postgres'],
|
||||
# shell=True)
|
||||
# time.sleep(3)
|
||||
# try:
|
||||
# document_store = FAISSDocumentStore(sql_url="postgresql://postgres:password@localhost:5432/haystack",
|
||||
# faiss_index_factory_str=index_type)
|
||||
# except:
|
||||
# Launch a postgres instance & create empty DB
|
||||
# logger.info("Didn't find Postgres. Start a new instance...")
|
||||
status = subprocess.run(
|
||||
['docker rm -f haystack-postgres'],
|
||||
shell=True)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user