mirror of
https://github.com/allenai/olmocr.git
synced 2026-01-03 10:47:42 +00:00
help text
This commit is contained in:
parent
7dbcbc154b
commit
77f0b9fa84
@ -550,8 +550,8 @@ if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Manager for running millions of PDFs through a batch inference pipeline')
|
||||
parser.add_argument('workspace', help='The S3 path where work will be done e.g., s3://bucket/prefix/)')
|
||||
parser.add_argument('--add_pdfs', help='Path to add pdfs stored in s3 to the workspace, can be a glob path s3://bucket/prefix/*.pdf or path to file containing list of pdf paths', default=None)
|
||||
parser.add_argument('--target_longest_image_dim', type=int, help='Dimension to use for rendering image', default=1024)
|
||||
parser.add_argument('--target_anchor_text_len', type=int, help='Maximum amount of anchor text to use', default=6000)
|
||||
parser.add_argument('--target_longest_image_dim', type=int, help='Dimension on longest side to use for rendering the pdf pages', default=1024)
|
||||
parser.add_argument('--target_anchor_text_len', type=int, help='Maximum amount of anchor text to use (characters)', default=6000)
|
||||
parser.add_argument('--workspace_profile', help='S3 configuration profile for accessing the workspace', default=None)
|
||||
parser.add_argument('--pdf_profile', help='S3 configuration profile for accessing the raw pdf documents', default=None)
|
||||
parser.add_argument('--max_size_mb', type=int, default=250, help='Max file size in MB')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user