mirror of
https://github.com/allenai/olmocr.git
synced 2025-06-27 04:00:02 +00:00
12 lines
355 B
Bash
12 lines
355 B
Bash
![]() |
#!/bin/bash
|
||
|
|
||
|
set -e
|
||
|
|
||
|
python scripts/pii_rule_comparison.py \
|
||
|
--docs-folder /home/ubuntu/s2pdf_dedupe_minhash_v1_with_no_pii/documents \
|
||
|
--ref-rule "ft_lang_id_en_doc_v2__ft_lang_id_en_doc_v2__en:avg>0.5" \
|
||
|
--hyp-rule "ft_lang_id_en_doc_v2__ft_lang_id_en_doc_v2__en:avg>0.4" \
|
||
|
--output-dir results/pii_detection \
|
||
|
|
||
|
|
||
|
tinyhost results/pii_detection/*
|