From c471ea3cc7c167f1328256ef1719f6cca4849cad Mon Sep 17 00:00:00 2001 From: Klaijan Date: Sat, 4 Nov 2023 13:58:56 -0400 Subject: [PATCH] chore: remove copy line from non-matrix connectors (#1976) --- test_unstructured_ingest/metrics/aggregate-scores-cct.tsv | 2 +- test_unstructured_ingest/metrics/all-docs-cct.tsv | 8 +++----- .../metrics/metrics-json-manifest.txt | 1 + test_unstructured_ingest/src/against-api.sh | 2 -- test_unstructured_ingest/src/airtable-diff.sh | 2 -- test_unstructured_ingest/src/biomed-api.sh | 2 -- test_unstructured_ingest/src/biomed-path.sh | 2 -- test_unstructured_ingest/src/box.sh | 2 -- test_unstructured_ingest/src/confluence-diff.sh | 2 -- test_unstructured_ingest/src/delta-table.sh | 2 -- test_unstructured_ingest/src/discord.sh | 2 -- test_unstructured_ingest/src/dropbox.sh | 2 -- test_unstructured_ingest/src/elasticsearch.sh | 2 -- test_unstructured_ingest/src/embed.sh | 2 -- test_unstructured_ingest/src/github.sh | 2 -- test_unstructured_ingest/src/gitlab.sh | 2 -- test_unstructured_ingest/src/jira.sh | 2 -- test_unstructured_ingest/src/notion.sh | 2 -- test_unstructured_ingest/src/onedrive.sh | 2 -- test_unstructured_ingest/src/outlook.sh | 2 -- test_unstructured_ingest/src/pdf-fast-reprocess.sh | 2 -- test_unstructured_ingest/src/salesforce.sh | 2 -- test_unstructured_ingest/src/slack.sh | 2 -- test_unstructured_ingest/src/wikipedia.sh | 2 -- 24 files changed, 5 insertions(+), 48 deletions(-) diff --git a/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv b/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv index e362972a4..0bba1b63a 100644 --- a/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv +++ b/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv @@ -1,3 +1,3 @@ strategy average sample_sd population_sd count cct-accuracy 0.798 0.083 0.072 4 -cct-%missing 0.089 0.04 0.035 4 +cct-%missing 0.089 0.04 0.035 4 \ No newline at end of file diff --git a/test_unstructured_ingest/metrics/all-docs-cct.tsv b/test_unstructured_ingest/metrics/all-docs-cct.tsv index 69ffeaaab..3efe86ea5 100644 --- a/test_unstructured_ingest/metrics/all-docs-cct.tsv +++ b/test_unstructured_ingest/metrics/all-docs-cct.tsv @@ -1,5 +1,3 @@ -filename doctype connector cct-accuracy cct-%missing -science-exploration-1p.pptx pptx dropbox 0.861 0.093 -science-exploration-1p.pptx pptx box 0.861 0.093 -example-10k.html html local 0.686 0.037 -IRS-form-1987.pdf pdf azure 0.783 0.135 +filename connector cct-accuracy cct-%missing +example-10k.html local 0.686 0.037 +IRS-form-1987.pdf azure 0.783 0.135 \ No newline at end of file diff --git a/test_unstructured_ingest/metrics/metrics-json-manifest.txt b/test_unstructured_ingest/metrics/metrics-json-manifest.txt index 096cd3259..5f0b75252 100644 --- a/test_unstructured_ingest/metrics/metrics-json-manifest.txt +++ b/test_unstructured_ingest/metrics/metrics-json-manifest.txt @@ -1,3 +1,4 @@ +handbook-1p.docx.json example-10k.html.json IRS-form-1987.pdf.json science-exploration-1p.pptx.json \ No newline at end of file diff --git a/test_unstructured_ingest/src/against-api.sh b/test_unstructured_ingest/src/against-api.sh index cab0331b7..cca75ccbd 100755 --- a/test_unstructured_ingest/src/against-api.sh +++ b/test_unstructured_ingest/src/against-api.sh @@ -40,5 +40,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-num-files-output.sh 1 $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/airtable-diff.sh b/test_unstructured_ingest/src/airtable-diff.sh index abcf82286..1e0f9c267 100755 --- a/test_unstructured_ingest/src/airtable-diff.sh +++ b/test_unstructured_ingest/src/airtable-diff.sh @@ -51,5 +51,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --verbose "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/biomed-api.sh b/test_unstructured_ingest/src/biomed-api.sh index 31d15b244..b41c6bfc6 100755 --- a/test_unstructured_ingest/src/biomed-api.sh +++ b/test_unstructured_ingest/src/biomed-api.sh @@ -42,5 +42,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/biomed-path.sh b/test_unstructured_ingest/src/biomed-path.sh index e8e7754bb..ceb200983 100755 --- a/test_unstructured_ingest/src/biomed-path.sh +++ b/test_unstructured_ingest/src/biomed-path.sh @@ -40,5 +40,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/box.sh b/test_unstructured_ingest/src/box.sh index 300afeb2f..7efb02d5f 100755 --- a/test_unstructured_ingest/src/box.sh +++ b/test_unstructured_ingest/src/box.sh @@ -54,5 +54,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/confluence-diff.sh b/test_unstructured_ingest/src/confluence-diff.sh index 7eee893e1..066bde029 100755 --- a/test_unstructured_ingest/src/confluence-diff.sh +++ b/test_unstructured_ingest/src/confluence-diff.sh @@ -48,5 +48,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/delta-table.sh b/test_unstructured_ingest/src/delta-table.sh index 13b9e5813..e6286228e 100755 --- a/test_unstructured_ingest/src/delta-table.sh +++ b/test_unstructured_ingest/src/delta-table.sh @@ -45,5 +45,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/discord.sh b/test_unstructured_ingest/src/discord.sh index f11ccbeea..34587a740 100755 --- a/test_unstructured_ingest/src/discord.sh +++ b/test_unstructured_ingest/src/discord.sh @@ -44,5 +44,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/dropbox.sh b/test_unstructured_ingest/src/dropbox.sh index 92dafeb79..45583a226 100755 --- a/test_unstructured_ingest/src/dropbox.sh +++ b/test_unstructured_ingest/src/dropbox.sh @@ -51,5 +51,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/elasticsearch.sh b/test_unstructured_ingest/src/elasticsearch.sh index 9148921c3..d85c4d160 100755 --- a/test_unstructured_ingest/src/elasticsearch.sh +++ b/test_unstructured_ingest/src/elasticsearch.sh @@ -51,5 +51,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/embed.sh b/test_unstructured_ingest/src/embed.sh index 2faef2e2a..a370a7885 100755 --- a/test_unstructured_ingest/src/embed.sh +++ b/test_unstructured_ingest/src/embed.sh @@ -37,5 +37,3 @@ set +e # once we have an alternative encoder that is deterministic, we test the diff here # until then just validating the file was created "$SCRIPT_DIR"/check-num-files-output.sh 1 "$OUTPUT_FOLDER_NAME" - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/github.sh b/test_unstructured_ingest/src/github.sh index 37b6dd934..944ad5acd 100755 --- a/test_unstructured_ingest/src/github.sh +++ b/test_unstructured_ingest/src/github.sh @@ -55,5 +55,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ $ACCESS_TOKEN_FLAGS "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/gitlab.sh b/test_unstructured_ingest/src/gitlab.sh index f5cfa1a95..6c4019384 100755 --- a/test_unstructured_ingest/src/gitlab.sh +++ b/test_unstructured_ingest/src/gitlab.sh @@ -41,5 +41,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-num-files-output.sh 2 $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/jira.sh b/test_unstructured_ingest/src/jira.sh index 71e9683a0..bc1dd2eb5 100755 --- a/test_unstructured_ingest/src/jira.sh +++ b/test_unstructured_ingest/src/jira.sh @@ -71,5 +71,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/notion.sh b/test_unstructured_ingest/src/notion.sh index a92350b32..3c047b9a6 100755 --- a/test_unstructured_ingest/src/notion.sh +++ b/test_unstructured_ingest/src/notion.sh @@ -45,5 +45,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/onedrive.sh b/test_unstructured_ingest/src/onedrive.sh index a9d2d3bc9..76ea58854 100755 --- a/test_unstructured_ingest/src/onedrive.sh +++ b/test_unstructured_ingest/src/onedrive.sh @@ -49,5 +49,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/outlook.sh b/test_unstructured_ingest/src/outlook.sh index 47745a21f..75443f5c7 100755 --- a/test_unstructured_ingest/src/outlook.sh +++ b/test_unstructured_ingest/src/outlook.sh @@ -49,5 +49,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/pdf-fast-reprocess.sh b/test_unstructured_ingest/src/pdf-fast-reprocess.sh index 89aa0c6d3..fb7adc680 100755 --- a/test_unstructured_ingest/src/pdf-fast-reprocess.sh +++ b/test_unstructured_ingest/src/pdf-fast-reprocess.sh @@ -45,5 +45,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/salesforce.sh b/test_unstructured_ingest/src/salesforce.sh index e9172f467..64d82355b 100755 --- a/test_unstructured_ingest/src/salesforce.sh +++ b/test_unstructured_ingest/src/salesforce.sh @@ -61,5 +61,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/slack.sh b/test_unstructured_ingest/src/slack.sh index f843f708f..c9e113f2b 100755 --- a/test_unstructured_ingest/src/slack.sh +++ b/test_unstructured_ingest/src/slack.sh @@ -47,5 +47,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME" diff --git a/test_unstructured_ingest/src/wikipedia.sh b/test_unstructured_ingest/src/wikipedia.sh index c03279fcc..989117f77 100755 --- a/test_unstructured_ingest/src/wikipedia.sh +++ b/test_unstructured_ingest/src/wikipedia.sh @@ -38,5 +38,3 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --work-dir "$WORK_DIR" "$SCRIPT_DIR"/check-num-files-output.sh 3 $OUTPUT_FOLDER_NAME - -"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"