diff --git a/.github/workflows/cml.yaml b/.github/workflows/cml.yaml new file mode 100644 index 000000000..7badc8fc1 --- /dev/null +++ b/.github/workflows/cml.yaml @@ -0,0 +1,90 @@ +name: benchmarks + +on: + workflow_dispatch: + pull_request: + types: [labeled] +jobs: + deploy-cloud-runner: + if: ${{ (github.event.action == 'labeled' && github.event.label.name == 'benchmark') || github.event.action == 'workflow_dispatch' }} + runs-on: [ubuntu-latest] + container: docker://dvcorg/cml + steps: + - name: deploy + env: + repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CI_ACCESS_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CI_SECRET_ACCESS_KEY }} + VPC: ${{ secrets.AWS_CI_VPC }} + run: | + echo "Deploying..." + RUNNER_LABELS="cml,aws" + RUNNER_REPO="https://github.com/${GITHUB_REPOSITORY}" + MACHINE="cml$(date +%s)" + docker-machine create \ + --driver amazonec2 \ + --amazonec2-instance-type p3.2xlarge \ + --amazonec2-vpc-id $VPC \ + --amazonec2-region us-east-1 \ + --amazonec2-zone c \ + --amazonec2-ssh-user ubuntu \ + --amazonec2-ami ami-06a25ee8966373068 \ + --amazonec2-root-size 150 \ + $MACHINE + eval "$(docker-machine env --shell sh $MACHINE)" + + ( + docker-machine ssh $MACHINE "sudo mkdir -p \ + /docker_machine && \ + sudo chmod 777 /docker_machine" && \ + docker-machine scp -r -q ~/.docker/machine/ \ + $MACHINE:/docker_machine && \ + docker run --name elasticsearch -d \ + -p 9200:9200 \ + -e "discovery.type=single-node" \ + elasticsearch:7.9.2 && \ + docker run --name postgres -d \ + -p 5432:5432 \ + --net host \ + -e POSTGRES_PASSWORD=password \ + -v /docker_machine/machine:/root/.docker/machine \ + -e DOCKER_MACHINE=$MACHINE \ + postgres && \ + sleep 4 && \ + docker exec -i postgres psql -U postgres -c "CREATE DATABASE haystack;" && \ + docker run --name runner -d \ + --gpus all \ + -v /docker_machine/machine:/root/.docker/machine \ + --net host \ + -e DOCKER_MACHINE=$MACHINE \ + -e repo_token=$repo_token \ + -e RUNNER_LABELS=$RUNNER_LABELS \ + -e RUNNER_REPO=$RUNNER_REPO \ + -e RUNNER_IDLE_TIMEOUT=120 \ + dvcorg/cml-py3:latest && \ + sleep 20 && echo "Deployed $MACHINE" + ) || (echo "Shut down machine" && docker-machine rm -y -f $MACHINE && exit 1) + run-benchmark: + if: ${{ (github.event.action == 'labeled' && github.event.label.name == 'benchmark') || github.event.action == 'workflow_dispatch' }} + needs: deploy-cloud-runner + runs-on: [self-hosted,cml] + steps: + - uses: actions/checkout@v2 + - name: cml_run + env: + repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CI_ACCESS_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CI_SECRET_ACCESS_KEY }} + run: | + apt-get update -y + apt-get install python3-dev -y + pip install -r requirements.txt + pip install . + cd test/benchmarks && python run.py --retriever_index --retriever_query --reader --ci --save_markdown + echo -en "## Benchmarks: Retriever Indexing\n" >> report.md + cat retriever_index_results.md >> report.md + echo -en "\n\n## Benchmarks: Retriever Querying\n" >> report.md + cat retriever_query_results.md >> report.md + echo -en "\n\n## Benchmarks: Reader\n" >> report.md + cat reader_results.md >> report.md + cml-send-comment report.md diff --git a/haystack/preprocessor/utils.py b/haystack/preprocessor/utils.py index e57743e47..47216761d 100644 --- a/haystack/preprocessor/utils.py +++ b/haystack/preprocessor/utils.py @@ -28,17 +28,24 @@ def eval_data_from_file(filename: str, max_docs: Union[int, bool]=None) -> Tuple :param max_docs: This sets the number of documents that will be loaded. By default, this is set to None, thus reading in all available eval documents. :return: (List of Documents, List of Labels) """ - docs = [] + + docs: List[Document] = [] labels = [] with open(filename, "r") as file: data = json.load(file) if "title" not in data["data"][0]: logger.warning(f"No title information found for documents in QA file: {filename}") - for document in data["data"][:max_docs]: + for document in data["data"]: + if max_docs: + if len(docs) > max_docs: + break # get all extra fields from document level (e.g. title) meta_doc = {k: v for k, v in document.items() if k not in ("paragraphs", "title")} for paragraph in document["paragraphs"]: + if max_docs: + if len(docs) > max_docs: + break cur_meta = {"name": document.get("title", None)} # all other fields from paragraph level meta_paragraph = {k: v for k, v in paragraph.items() if k not in ("qas", "context")} diff --git a/test/benchmarks/config.json b/test/benchmarks/config.json index d214b294b..c352fbcad 100644 --- a/test/benchmarks/config.json +++ b/test/benchmarks/config.json @@ -36,7 +36,7 @@ "n_docs_options": [ 1000 ], - "n_queries": 10 + "n_queries": 100 } }, "filenames": { diff --git a/test/benchmarks/reader.py b/test/benchmarks/reader.py index 83689e287..41308a106 100644 --- a/test/benchmarks/reader.py +++ b/test/benchmarks/reader.py @@ -11,7 +11,9 @@ import logging logger = logging.getLogger(__name__) -reader_models_full = ["deepset/roberta-base-squad2"] +reader_models_full = ["deepset/roberta-base-squad2", "deepset/minilm-uncased-squad2", + "deepset/bert-base-cased-squad2", "deepset/bert-large-uncased-whole-word-masking-squad2", + "deepset/xlm-roberta-large-squad2", "distilbert-base-uncased-distilled-squad"] reader_models_ci = ["deepset/minilm-uncased-squad2"] reader_types = ["farm"] @@ -32,18 +34,13 @@ label_index = "label" def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs): if ci: reader_models = reader_models_ci - max_docs = 100 - # heuristic to estimate num of passages for the reduced num of docs - n_passages = n_total_passages * (max_docs / n_total_docs) else: reader_models = reader_models_full - max_docs = None - n_passages = n_total_passages reader_results = [] doc_store = get_document_store("elasticsearch") # download squad data _download_extract_downstream_data(input_file=data_dir/filename) - docs, labels = eval_data_from_file(data_dir/filename, max_docs) + docs, labels = eval_data_from_file(data_dir/filename, max_docs=None) index_to_doc_store(doc_store, docs, None, labels) for reader_name in reader_models: @@ -55,9 +52,8 @@ def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs) doc_index=doc_index, label_index=label_index, device="cuda") - # results = reader.eval_on_file(data_dir, filename, device="cuda") - print(results) - results["passages_per_second"] = n_passages / results["reader_time"] + # print(results) + results["passages_per_second"] = n_total_passages / results["reader_time"] results["reader"] = reader_name results["error"] = "" reader_results.append(results) @@ -78,11 +74,12 @@ def benchmark_reader(ci=False, update_json=False, save_markdown=False, **kwargs) md_file = results_file.replace(".csv", ".md") with open(md_file, "w") as f: f.write(str(reader_df.to_markdown())) + doc_store.delete_all_documents(label_index) + doc_store.delete_all_documents(doc_index) if update_json: populate_reader_json() - def populate_reader_json(): reader_results = reader_json() template = READER_TEMPLATE @@ -91,4 +88,4 @@ def populate_reader_json(): if __name__ == "__main__": - benchmark_reader(ci=False, update_json=False, save_markdown=False) \ No newline at end of file + benchmark_reader(ci=True, update_json=True, save_markdown=True) \ No newline at end of file diff --git a/test/benchmarks/retriever.py b/test/benchmarks/retriever.py index 807acdc2e..b9e21b63c 100644 --- a/test/benchmarks/retriever.py +++ b/test/benchmarks/retriever.py @@ -36,7 +36,7 @@ speed_json = "../../docs/_src/benchmarks/retriever_speed.json" seed = 42 random.seed(42) -def benchmark_indexing(n_docs_options, retriever_doc_stores, data_dir, filename_gold, filename_negative, data_s3_url, embeddings_filenames, embeddings_dir, update_json, **kwargs): +def benchmark_indexing(n_docs_options, retriever_doc_stores, data_dir, filename_gold, filename_negative, data_s3_url, embeddings_filenames, embeddings_dir, update_json, save_markdown, **kwargs): retriever_results = [] for n_docs in n_docs_options: @@ -73,14 +73,18 @@ def benchmark_indexing(n_docs_options, retriever_doc_stores, data_dir, filename_ retriever_df.to_csv(index_results_file) doc_store.delete_all_documents(index=doc_index) doc_store.delete_all_documents(index=label_index) + if save_markdown: + md_file = index_results_file.replace(".csv", ".md") + with open(md_file, "w") as f: + f.write(str(retriever_df.to_markdown())) time.sleep(10) del doc_store del retriever - except Exception as e: + except Exception: tb = traceback.format_exc() - logging.ERROR(f"##### The following Error was raised while running indexing run: {retriever_name}, {doc_store_name}, {n_docs} docs #####") - logging.Error(tb) + logging.error(f"##### The following Error was raised while running indexing run: {retriever_name}, {doc_store_name}, {n_docs} docs #####") + logging.error(tb) retriever_results.append({ "retriever": retriever_name, "doc_store": doc_store_name, @@ -109,6 +113,7 @@ def benchmark_querying(n_docs_options, embeddings_filenames, embeddings_dir, update_json, + save_markdown, **kwargs): """ Benchmark the time it takes to perform querying. Doc embeddings are loaded from file.""" retriever_results = [] @@ -155,10 +160,10 @@ def benchmark_querying(n_docs_options, time.sleep(5) del doc_store del retriever - except Exception as e: + except Exception: tb = traceback.format_exc() - logging.ERROR(f"##### The following Error was raised while running querying run: {retriever_name}, {doc_store_name}, {n_docs} docs #####") - logging.Error(tb) + logging.error(f"##### The following Error was raised while running querying run: {retriever_name}, {doc_store_name}, {n_docs} docs #####") + logging.error(tb) results = { "retriever": retriever_name, "doc_store": doc_store_name, @@ -184,6 +189,10 @@ def benchmark_querying(n_docs_options, retriever_df = pd.DataFrame.from_records(retriever_results) retriever_df = retriever_df.sort_values(by="retriever").sort_values(by="doc_store") retriever_df.to_csv(query_results_file) + if save_markdown: + md_file = query_results_file.replace(".csv", ".md") + with open(md_file, "w") as f: + f.write(str(retriever_df.to_markdown())) if update_json: populate_retriever_json() @@ -281,6 +290,6 @@ def prepare_negative_passages(data_dir, filename_negative, n_docs): if __name__ == "__main__": params, filenames = load_config(config_filename="config.json", ci=True) - benchmark_indexing(**params, **filenames) - benchmark_querying(**params, **filenames) + benchmark_indexing(**params, **filenames, update_json=True, save_markdown=False) + benchmark_querying(**params, **filenames, update_json=True, save_markdown=False) diff --git a/test/benchmarks/run.py b/test/benchmarks/run.py index 65320a7d4..8c2d2a659 100644 --- a/test/benchmarks/run.py +++ b/test/benchmarks/run.py @@ -17,13 +17,14 @@ parser.add_argument('--ci', default=False, action="store_true", help='Perform a smaller subset of benchmarks that are quicker to run') parser.add_argument('--update_json', default=False, action="store_true", help='Update the json file with the results of this run so that the website can be updated') - +parser.add_argument('--save_markdown', default=False, action="store_true", + help='Update the json file with the results of this run so that the website can be updated') args = parser.parse_args() if args.retriever_index: - benchmark_indexing(**params, **filenames, ci=args.ci, update_json=args.update_json) + benchmark_indexing(**params, **filenames, ci=args.ci, update_json=args.update_json, save_markdown=args.save_markdown) if args.retriever_query: - benchmark_querying(**params, **filenames, ci=args.ci, update_json=args.update_json) + benchmark_querying(**params, **filenames, ci=args.ci, update_json=args.update_json, save_markdown=args.save_markdown) if args.reader: - benchmark_reader(**params, **filenames, ci=args.ci, update_json=args.update_json) + benchmark_reader(**params, **filenames, ci=args.ci, update_json=args.update_json, save_markdown=args.save_markdown) diff --git a/test/benchmarks/utils.py b/test/benchmarks/utils.py index ff8e3abaf..568cefa48 100644 --- a/test/benchmarks/utils.py +++ b/test/benchmarks/utils.py @@ -18,8 +18,6 @@ logger = logging.getLogger(__name__) reader_models = ["deepset/roberta-base-squad2", "deepset/minilm-uncased-squad2", "deepset/bert-base-cased-squad2", "deepset/bert-large-uncased-whole-word-masking-squad2", "deepset/xlm-roberta-large-squad2"] reader_types = ["farm"] -data_dir_reader = Path("../../data/squad20") -filename_reader = "dev-v2.0.json" doc_index = "eval_document" label_index = "label" @@ -43,18 +41,6 @@ def get_document_store(document_store_type, es_similarity='cosine'): index_type = "Flat" elif document_store_type == "faiss_hnsw": index_type = "HNSW" - - #TEMP FIX for issue with deleting docs - # status = subprocess.run( - # ['docker rm -f haystack-postgres'], - # shell=True) - # time.sleep(3) - # try: - # document_store = FAISSDocumentStore(sql_url="postgresql://postgres:password@localhost:5432/haystack", - # faiss_index_factory_str=index_type) - # except: - # Launch a postgres instance & create empty DB - # logger.info("Didn't find Postgres. Start a new instance...") status = subprocess.run( ['docker rm -f haystack-postgres'], shell=True)