diff --git a/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv b/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv index 971d0c650..512af212e 100644 --- a/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv +++ b/test_unstructured_ingest/metrics/aggregate-scores-cct.tsv @@ -1,3 +1,3 @@ strategy average sample_sd population_sd count -cct-accuracy 0.735 0.069 0.048 2 -cct-%missing 0.086 0.069 0.049 2 +cct-accuracy 0.761 0.295 0.28 10 +cct-%missing 0.035 0.037 0.035 10 diff --git a/test_unstructured_ingest/metrics/all-docs-cct.tsv b/test_unstructured_ingest/metrics/all-docs-cct.tsv index 35124f7f1..0db486034 100644 --- a/test_unstructured_ingest/metrics/all-docs-cct.tsv +++ b/test_unstructured_ingest/metrics/all-docs-cct.tsv @@ -1,3 +1,11 @@ filename doctype connector cct-accuracy cct-%missing +Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf pdf azure 0.981 0.007 IRS-form-1987.pdf pdf azure 0.783 0.135 +spring-weather.html html azure 0.0 0.018 +wiki_movie_plots_small.csv csv s3-minio 0.979 0.03 example-10k.html html local 0.686 0.037 +ideas-page.html html local 0.929 0.033 +fake-html-cp1252.html html local 0.659 0.0 +layout-parser-paper.pdf pdf local-single-file-with-pdf-infer-table-structure 0.945 0.029 +layout-parser-paper-with-table.jpg jpg local-single-file-with-pdf-infer-table-structure 0.716 0.032 +ideas-page.html html gcs 0.929 0.033 diff --git a/test_unstructured_ingest/metrics/metrics-json-manifest.txt b/test_unstructured_ingest/metrics/metrics-json-manifest.txt index 5f0b75252..9ac13aaa1 100644 --- a/test_unstructured_ingest/metrics/metrics-json-manifest.txt +++ b/test_unstructured_ingest/metrics/metrics-json-manifest.txt @@ -1,4 +1,18 @@ handbook-1p.docx.json example-10k.html.json IRS-form-1987.pdf.json -science-exploration-1p.pptx.json \ No newline at end of file +science-exploration-1p.pptx.json +Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json +spring-weather.html.json +ideas-page.html.json +fake-text.txt.json +stanley-cups.xlsx.json +UDHR_first_article_all.txt.json +fake-html-cp1252.html.json +layout-parser-paper-with-table.jpg.json +layout-parser-paper.pdf.json +2023-Jan-economic-outlook.pdf.json +page-with-formula.pdf.json +recalibrating-risk-report.pdf.json +Silent-Giant-(1).pdf.json +wiki_movie_plots_small.csv.json \ No newline at end of file