From ab7fafcb41c055ce9ae2eeff690de018dd89fcd8 Mon Sep 17 00:00:00 2001 From: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:20:26 -0700 Subject: [PATCH] doc: add pdf extra note (#1165) --- CHANGELOG.md | 3 ++- docs/source/introduction/getting_started.rst | 8 +++----- unstructured/__version__.py | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0926c0aaf..539d03af9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.10.5-dev3 +## 0.10.5-dev4 ### Enhancements * Create new CI Pipelines @@ -7,6 +7,7 @@ * `partition` raises and error and tells the user to install the appropriate extra if a filetype is detected that is missing dependencies. * Add custom errors to ingest +* Add notes on extra installs to docs ## 0.10.3 diff --git a/docs/source/introduction/getting_started.rst b/docs/source/introduction/getting_started.rst index d42cc33c0..60e9daadd 100644 --- a/docs/source/introduction/getting_started.rst +++ b/docs/source/introduction/getting_started.rst @@ -58,9 +58,7 @@ The example documents in this section come from the directory in the ``unstructured`` repo. Before running the code in this make sure you've installed the ``unstructured`` library -and all dependencies using the instructions in the **Quick Start** section. - - +and all dependencies using the instructions in the `Quick Start `_ section. Partitioning a document ~~~~~~~~~~~~~~~~~~~~~~~ @@ -164,7 +162,7 @@ of the table will be available in the element metadata under ``element.metadata. table extraction is available, the ``partition`` function will extract tables automatically if they are present. For PDFs and images, table extraction requires a relatively expensive call to a table recognition model, and so for those document types table extraction is an option you need to enable. If you would like to extract tables for PDFs or images, -pass in ``infer_table_structured=True``. Here is an example: +pass in ``infer_table_structured=True``. Here is an example (Note: this example requires the ``pdf`` extra. This can be installed with ``pip install "unstructured[pdf]"``): .. code:: python @@ -257,7 +255,7 @@ looks like the following: from unstructured.partition.auto import partition from unstructured.staging.base import elements_to_json - input_filename = "example-10k.html" + input_filename = "example-docs/example-10k.html" output_filename = "outputs.json" elements = partition(filename=input_filename) diff --git a/unstructured/__version__.py b/unstructured/__version__.py index e575866c4..acfbd6c68 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.10.5-dev3" # pragma: no cover +__version__ = "0.10.5-dev4" # pragma: no cover